Merge branch 'master' into fix-refresh
This commit is contained in:
commit
4054ebfffd
|
@ -1,5 +1,6 @@
|
|||
## v1.4.2
|
||||
|
||||
* implemented a more robust fetching algorithm, which should prevent accounts getting stuck with only a fraction of their posts fetched ([GH-13](https://github.com/codl/forget/issues/13))
|
||||
* fix: picture tags having an extra comma
|
||||
* fix: outdated joke in about page
|
||||
* fix: posts' status not getting refreshed (ie whether or not they were faved, or deleted externally)
|
||||
|
@ -33,7 +34,7 @@ Released 2018-05-08
|
|||
|
||||
Released 2018-05-08
|
||||
|
||||
* remember a user's mastodon instances and let them log in in one click (GH-36)
|
||||
* remember a user's mastodon instances and let them log in in one click ([GH-36](https://github.com/codl/forget/issues/36))
|
||||
|
||||
## v1.1.3
|
||||
|
||||
|
|
|
@ -101,13 +101,11 @@ def get_api_for_acc(account):
|
|||
except (MastodonNetworkError,
|
||||
MastodonRatelimitError) as e:
|
||||
raise TemporaryError(e)
|
||||
raise TemporaryError('No access to account {}'.format(account))
|
||||
|
||||
|
||||
def fetch_acc(acc, cursor=None):
|
||||
def fetch_posts(acc, max_id, since_id):
|
||||
api = get_api_for_acc(acc)
|
||||
if not api:
|
||||
print('no access, aborting')
|
||||
return None
|
||||
|
||||
try:
|
||||
newacc = account_from_api_object(
|
||||
|
@ -115,32 +113,15 @@ def fetch_acc(acc, cursor=None):
|
|||
acc = db.session.merge(newacc)
|
||||
|
||||
kwargs = dict(limit=40)
|
||||
if cursor:
|
||||
kwargs.update(cursor)
|
||||
|
||||
if 'max_id' not in kwargs:
|
||||
most_recent_post = (
|
||||
Post.query.with_parent(acc)
|
||||
.order_by(db.desc(Post.created_at)).first())
|
||||
if most_recent_post:
|
||||
kwargs['since_id'] = most_recent_post.mastodon_id
|
||||
if max_id:
|
||||
kwargs['max_id'] = max_id
|
||||
if since_id:
|
||||
kwargs['since_id'] = since_id
|
||||
|
||||
statuses = api.account_statuses(acc.mastodon_id, **kwargs)
|
||||
|
||||
if statuses:
|
||||
for status in statuses:
|
||||
post = post_from_api_object(status, acc.mastodon_instance)
|
||||
db.session.merge(post)
|
||||
if 'max_id' not in kwargs:
|
||||
kwargs['max_id'] = int(status['id'])
|
||||
kwargs['max_id'] = min(int(kwargs['max_id']), int(status['id']))
|
||||
return [post_from_api_object(status, acc.mastodon_instance) for status in statuses]
|
||||
|
||||
else:
|
||||
kwargs = None
|
||||
|
||||
db.session.commit()
|
||||
|
||||
return kwargs
|
||||
except (MastodonAPIError,
|
||||
MastodonNetworkError,
|
||||
MastodonRatelimitError) as e:
|
||||
|
@ -177,8 +158,6 @@ def account_from_api_object(obj, instance):
|
|||
def refresh_posts(posts):
|
||||
acc = posts[0].author
|
||||
api = get_api_for_acc(acc)
|
||||
if not api:
|
||||
raise Exception('no access')
|
||||
|
||||
new_posts = list()
|
||||
for post in posts:
|
||||
|
|
|
@ -92,7 +92,7 @@ def get_twitter_for_acc(account):
|
|||
except URLError as e:
|
||||
raise TemporaryError(e)
|
||||
|
||||
return None
|
||||
raise TemporaryError("No access to account {}".format(account))
|
||||
|
||||
|
||||
locale.setlocale(locale.LC_TIME, 'C')
|
||||
|
@ -123,11 +123,8 @@ def post_from_api_tweet_object(tweet, post=None):
|
|||
return post
|
||||
|
||||
|
||||
def fetch_acc(account, cursor):
|
||||
def fetch_posts(account, max_id, since_id):
|
||||
t = get_twitter_for_acc(account)
|
||||
if not t:
|
||||
print("no twitter access, aborting")
|
||||
return
|
||||
|
||||
try:
|
||||
user = t.account.verify_credentials()
|
||||
|
@ -139,45 +136,22 @@ def fetch_acc(account, cursor):
|
|||
'trim_user': True,
|
||||
'tweet_mode': 'extended',
|
||||
}
|
||||
if cursor:
|
||||
kwargs.update(cursor)
|
||||
|
||||
if 'max_id' not in kwargs:
|
||||
most_recent_post = (
|
||||
Post.query.order_by(db.desc(Post.created_at))
|
||||
.filter(Post.author_id == account.id).first())
|
||||
if most_recent_post:
|
||||
kwargs['since_id'] = most_recent_post.twitter_id
|
||||
if max_id:
|
||||
kwargs['max_id'] = max_id
|
||||
if since_id:
|
||||
kwargs['since_id'] = since_id
|
||||
|
||||
tweets = t.statuses.user_timeline(**kwargs)
|
||||
except (TwitterError, URLError) as e:
|
||||
handle_error(e)
|
||||
|
||||
print("processing {} tweets for {acc}".format(len(tweets), acc=account))
|
||||
|
||||
if len(tweets) > 0:
|
||||
|
||||
kwargs['max_id'] = +inf
|
||||
|
||||
for tweet in tweets:
|
||||
db.session.merge(post_from_api_tweet_object(tweet))
|
||||
kwargs['max_id'] = min(tweet['id'] - 1, kwargs['max_id'])
|
||||
|
||||
else:
|
||||
kwargs = None
|
||||
|
||||
db.session.commit()
|
||||
|
||||
return kwargs
|
||||
|
||||
return [post_from_api_tweet_object(tweet) for tweet in tweets]
|
||||
|
||||
def refresh_posts(posts):
|
||||
if not posts:
|
||||
return posts
|
||||
|
||||
t = get_twitter_for_acc(posts[0].author)
|
||||
if not t:
|
||||
return
|
||||
try:
|
||||
tweets = t.statuses.lookup(
|
||||
_id=",".join((post.twitter_id for post in posts)),
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
"""new fetching flags
|
||||
|
||||
Revision ID: 4b56cde3ebd7
|
||||
Revises: c136aa1157f9
|
||||
Create Date: 2019-02-24 11:53:29.128983
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '4b56cde3ebd7'
|
||||
down_revision = 'c136aa1157f9'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.add_column('accounts', sa.Column('fetch_current_batch_end_id', sa.String(), nullable=True))
|
||||
op.add_column('accounts', sa.Column('fetch_history_complete', sa.Boolean(), server_default='FALSE', nullable=False))
|
||||
op.create_foreign_key(op.f('fk_accounts_fetch_current_batch_end_id_posts'), 'accounts', 'posts', ['fetch_current_batch_end_id'], ['id'], ondelete='SET NULL')
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_constraint(op.f('fk_accounts_fetch_current_batch_end_id_posts'), 'accounts', type_='foreignkey')
|
||||
op.drop_column('accounts', 'fetch_history_complete')
|
||||
op.drop_column('accounts', 'fetch_current_batch_end_id')
|
28
model.py
28
model.py
|
@ -4,6 +4,7 @@ from app import db
|
|||
import secrets
|
||||
from libforget.interval import decompose_interval
|
||||
import random
|
||||
from sqlalchemy.ext.declarative import declared_attr
|
||||
|
||||
|
||||
class TimestampMixin(object):
|
||||
|
@ -67,6 +68,13 @@ class RemoteIDMixin(object):
|
|||
def mastodon_id(self, id_):
|
||||
self.id = "mastodon:{}@{}".format(id_, self.mastodon_instance)
|
||||
|
||||
@property
|
||||
def remote_id(self):
|
||||
if self.service == 'twitter':
|
||||
return self.twitter_id
|
||||
elif self.service == 'mastodon':
|
||||
return self.mastodon_id
|
||||
|
||||
|
||||
ThreeWayPolicyEnum = db.Enum('keeponly', 'deleteonly', 'none',
|
||||
name='enum_3way_policy')
|
||||
|
@ -105,6 +113,19 @@ class Account(TimestampMixin, RemoteIDMixin):
|
|||
last_delete = db.Column(db.DateTime(timezone=True), index=True)
|
||||
next_delete = db.Column(db.DateTime(timezone=True), index=True)
|
||||
|
||||
fetch_history_complete = db.Column(db.Boolean, server_default='FALSE',
|
||||
nullable=False)
|
||||
|
||||
@declared_attr
|
||||
def fetch_current_batch_end_id(cls):
|
||||
return db.Column(db.String, db.ForeignKey('posts.id', ondelete='SET NULL'))
|
||||
@declared_attr
|
||||
def fetch_current_batch_end(cls):
|
||||
return db.relationship("Post", foreign_keys=(cls.fetch_current_batch_end_id,))
|
||||
# the declared_attr is necessary because of the foreign key
|
||||
# and because this class is technically one big mixin
|
||||
# https://docs.sqlalchemy.org/en/latest/orm/extensions/declarative/mixins.html#mixing-in-relationships
|
||||
|
||||
reason = db.Column(db.String)
|
||||
dormant = db.Column(db.Boolean, server_default='FALSE', nullable=False)
|
||||
|
||||
|
@ -175,7 +196,7 @@ class Account(TimestampMixin, RemoteIDMixin):
|
|||
# backref: sessions
|
||||
|
||||
def post_count(self):
|
||||
return Post.query.with_parent(self).count()
|
||||
return Post.query.with_parent(self, 'posts').count()
|
||||
|
||||
def estimate_eligible_for_delete(self):
|
||||
"""
|
||||
|
@ -184,10 +205,10 @@ class Account(TimestampMixin, RemoteIDMixin):
|
|||
refresh every single post every time we need to know how many posts are
|
||||
eligible to delete
|
||||
"""
|
||||
latest_n_posts = (Post.query.with_parent(self)
|
||||
latest_n_posts = (Post.query.with_parent(self, 'posts')
|
||||
.order_by(db.desc(Post.created_at))
|
||||
.limit(self.policy_keep_latest))
|
||||
query = (Post.query.with_parent(self)
|
||||
query = (Post.query.with_parent(self, 'posts')
|
||||
.filter(Post.created_at <=
|
||||
db.func.now() - self.policy_keep_younger)
|
||||
.except_(latest_n_posts))
|
||||
|
@ -273,6 +294,7 @@ class Post(db.Model, TimestampMixin, RemoteIDMixin):
|
|||
nullable=False)
|
||||
author = db.relationship(
|
||||
Account,
|
||||
foreign_keys = (author_id,),
|
||||
backref=db.backref('posts',
|
||||
order_by=lambda: db.desc(Post.created_at)))
|
||||
|
||||
|
|
89
tasks.py
89
tasks.py
|
@ -67,13 +67,15 @@ def unique(fun):
|
|||
key = 'celery_unique_lock:{}'.format(
|
||||
pickle.dumps((fun.__name__, args, kwargs)))
|
||||
has_lock = False
|
||||
result = None
|
||||
try:
|
||||
if r.set(key, 1, nx=True, ex=60 * 5):
|
||||
has_lock = True
|
||||
return fun(*args, **kwargs)
|
||||
result = fun(*args, **kwargs)
|
||||
finally:
|
||||
if has_lock:
|
||||
r.delete(key)
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
|
||||
|
@ -94,19 +96,82 @@ def make_dormant(acc):
|
|||
|
||||
|
||||
@app.task(autoretry_for=(TemporaryError, ))
|
||||
@unique
|
||||
def fetch_acc(id_, cursor=None):
|
||||
def fetch_acc(id_):
|
||||
account = Account.query.get(id_)
|
||||
print(f'fetching {account}')
|
||||
try:
|
||||
action = noop
|
||||
if not account.fetch_history_complete:
|
||||
oldest = (db.session.query(Post)
|
||||
.with_parent(account, 'posts')
|
||||
.order_by(db.asc(Post.created_at))
|
||||
.first())
|
||||
# ^ None if this is our first fetch ever, otherwise the oldest post
|
||||
if oldest:
|
||||
max_id = oldest.remote_id
|
||||
else:
|
||||
max_id = None
|
||||
since_id = None
|
||||
elif account.fetch_current_batch_end:
|
||||
oldest = (db.session.query(Post)
|
||||
.with_parent(account, 'posts')
|
||||
.filter(Post.created_at > account.fetch_current_batch_end.created_at)
|
||||
.order_by(db.asc(Post.created_at))
|
||||
.first())
|
||||
# ^ None if this is our first fetch of this batch, otherwise oldest of this batch
|
||||
if oldest:
|
||||
max_id = oldest.remote_id
|
||||
else:
|
||||
max_id = None
|
||||
since_id = account.fetch_current_batch_end.remote_id
|
||||
else:
|
||||
# we shouldn't get here unless the user had no posts on the service last time we fetched
|
||||
max_id = None
|
||||
latest = (db.session.query(Post)
|
||||
.with_parent(account, 'posts')
|
||||
.order_by(db.desc(Post.created_at))
|
||||
.limit(1)
|
||||
.scalar())
|
||||
# ^ should be None considering the user has no posts
|
||||
# will be the latest post in the off chance that something goes weird
|
||||
if latest:
|
||||
since_id = latest.remote_id
|
||||
else:
|
||||
since_id = None
|
||||
|
||||
|
||||
print('max_id: {}, since_id: {}'.format(max_id, since_id))
|
||||
fetch_posts = noop
|
||||
if (account.service == 'twitter'):
|
||||
action = libforget.twitter.fetch_acc
|
||||
fetch_posts = libforget.twitter.fetch_posts
|
||||
elif (account.service == 'mastodon'):
|
||||
action = libforget.mastodon.fetch_acc
|
||||
cursor = action(account, cursor)
|
||||
if cursor:
|
||||
fetch_acc.si(id_, cursor).apply_async()
|
||||
fetch_posts = libforget.mastodon.fetch_posts
|
||||
posts = fetch_posts(account, max_id, since_id)
|
||||
|
||||
if posts is None:
|
||||
# ???
|
||||
raise TemporaryError("Fetching posts went horribly wrong")
|
||||
|
||||
if len(posts) == 0:
|
||||
# we either finished the historic fetch
|
||||
# or we finished the current batch
|
||||
account.fetch_history_complete = True
|
||||
account.fetch_current_batch_end_id = (Post.query
|
||||
.with_parent(account, 'posts').order_by(db.desc(Post.created_at)).first()).id
|
||||
# ^ note that this could be None if the user has no posts
|
||||
# this is okay
|
||||
|
||||
db.session.commit()
|
||||
|
||||
else:
|
||||
for post in posts:
|
||||
db.session.merge(post)
|
||||
db.session.commit()
|
||||
|
||||
if not account.fetch_history_complete:
|
||||
# reschedule immediately if we're still doing the historic fetch
|
||||
fetch_acc.apply_async((id_,))
|
||||
|
||||
|
||||
except TemporaryError:
|
||||
db.session.rollback()
|
||||
account.backoff()
|
||||
|
@ -162,11 +227,11 @@ def delete_from_account(account_id):
|
|||
if account.next_delete > datetime.now(timezone.utc):
|
||||
return
|
||||
|
||||
latest_n_posts = (Post.query.with_parent(account).order_by(
|
||||
latest_n_posts = (Post.query.with_parent(account, 'posts').order_by(
|
||||
db.desc(Post.created_at)).limit(account.policy_keep_latest)
|
||||
.cte(name='latest'))
|
||||
posts = (
|
||||
Post.query.with_parent(account)
|
||||
Post.query.with_parent(account, 'posts')
|
||||
.filter(Post.created_at + account.policy_keep_younger <= db.func.now())
|
||||
.filter(~Post.id.in_(db.select((latest_n_posts.c.id, )))).order_by(
|
||||
db.func.random()).limit(100).with_for_update().all())
|
||||
|
@ -237,7 +302,7 @@ def refresh_account(account_id):
|
|||
limit = 100
|
||||
if account.service == 'mastodon':
|
||||
limit = 3
|
||||
posts = (Post.query.with_parent(account).order_by(
|
||||
posts = (Post.query.with_parent(account, 'posts').order_by(
|
||||
db.asc(Post.updated_at)).limit(limit).all())
|
||||
|
||||
posts = refresh_posts(posts)
|
||||
|
|
Loading…
Reference in New Issue