2
0
mirror of https://github.com/codl/forget synced 2025-01-23 20:21:03 +01:00
forget-cancellare-vecchi-toot/model.py
codl a6c5361138
more robust fetching. closes #13
this also refactors libforget so neither the twitter or mastodon lib
insert posts directly
2019-02-24 15:40:41 +01:00

370 lines
13 KiB
Python

from datetime import timedelta, datetime, timezone
from app import db
import secrets
from libforget.interval import decompose_interval
import random
from sqlalchemy.ext.declarative import declared_attr
class TimestampMixin(object):
created_at = db.Column(
db.DateTime(timezone=True), server_default=db.func.now(),
nullable=False)
updated_at = db.Column(
db.DateTime(timezone=True), server_default=db.func.now(),
onupdate=db.func.now(), nullable=False)
def touch(self):
self.updated_at = db.func.now()
class RemoteIDMixin(object):
@property
def service(self):
if not self.id:
return None
return self.id.split(":")[0]
@property
def twitter_id(self):
if not self.id:
return None
if self.service != "twitter":
raise Exception(
"tried to get twitter id for a {} {}"
.format(self.service, type(self)))
return self.id.split(":")[1]
@twitter_id.setter
def twitter_id(self, id_):
self.id = "twitter:{}".format(id_)
@property
def mastodon_instance(self):
if not self.id:
return None
if self.service != "mastodon":
raise Exception(
"tried to get mastodon instance for a {} {}"
.format(self.service, type(self)))
return self.id.split(":", 1)[1].split('@')[1]
@mastodon_instance.setter
def mastodon_instance(self, instance):
self.id = "mastodon:{}@{}".format(self.mastodon_id, instance)
@property
def mastodon_id(self):
if not self.id:
return None
if self.service != "mastodon":
raise Exception(
"tried to get mastodon id for a {} {}"
.format(self.service, type(self)))
return self.id.split(":", 1)[1].split('@')[0]
@mastodon_id.setter
def mastodon_id(self, id_):
self.id = "mastodon:{}@{}".format(id_, self.mastodon_instance)
@property
def remote_id(self):
if self.service == 'twitter':
return self.twitter_id
elif self.service == 'mastodon':
return self.mastodon_id
ThreeWayPolicyEnum = db.Enum('keeponly', 'deleteonly', 'none',
name='enum_3way_policy')
@decompose_interval('policy_delete_every')
@decompose_interval('policy_keep_younger')
class Account(TimestampMixin, RemoteIDMixin):
__tablename__ = 'accounts'
id = db.Column(db.String, primary_key=True)
policy_enabled = db.Column(db.Boolean, server_default='FALSE',
nullable=False)
policy_keep_latest = db.Column(db.Integer, server_default='100',
nullable=False)
policy_keep_favourites = db.Column(ThreeWayPolicyEnum,
server_default='none', nullable=False)
policy_keep_media = db.Column(ThreeWayPolicyEnum, server_default='none',
nullable=False)
policy_delete_every = db.Column(db.Interval, server_default='30 minutes',
nullable=False)
policy_keep_younger = db.Column(db.Interval, server_default='365 days',
nullable=False)
policy_keep_direct = db.Column(db.Boolean, server_default='TRUE',
nullable=False)
display_name = db.Column(db.String)
screen_name = db.Column(db.String)
avatar_url = db.Column(db.String)
reported_post_count = db.Column(db.Integer)
last_fetch = db.Column(db.DateTime(timezone=True),
server_default='epoch', index=True)
last_refresh = db.Column(db.DateTime(timezone=True),
server_default='epoch', index=True)
last_delete = db.Column(db.DateTime(timezone=True), index=True)
next_delete = db.Column(db.DateTime(timezone=True), index=True)
fetch_history_complete = db.Column(db.Boolean, server_default='FALSE',
nullable=False)
@declared_attr
def fetch_current_batch_end_id(cls):
return db.Column(db.String, db.ForeignKey('posts.id', ondelete='SET NULL'))
@declared_attr
def fetch_current_batch_end(cls):
return db.relationship("Post", foreign_keys=(cls.fetch_current_batch_end_id,))
# the declared_attr is necessary because of the foreign key
# and because this class is technically one big mixin
# https://docs.sqlalchemy.org/en/latest/orm/extensions/declarative/mixins.html#mixing-in-relationships
reason = db.Column(db.String)
dormant = db.Column(db.Boolean, server_default='FALSE', nullable=False)
backoff_level = db.Column(db.Integer, server_default='0', nullable=False)
backoff_until = db.Column(db.DateTime(timezone=True), server_default='now', nullable=False)
BACKOFF_MAX = 14
# backoff is 10 seconds * 2^backoff_level
# this gives us roughly 1.8 days at level 14
def touch_fetch(self):
self.last_fetch = db.func.now()
def touch_delete(self):
self.last_delete = db.func.now()
# if it's been more than 1 delete cycle ago that we've deleted a post,
# reset next_delete to be 1 cycle away
if (datetime.now(timezone.utc) - self.next_delete
> self.policy_delete_every):
self.next_delete = db.func.now() + self.policy_delete_every
else:
self.next_delete += self.policy_delete_every
def touch_refresh(self):
self.last_refresh = db.func.now()
def get_avatar(self):
from app import imgproxy
from flask import url_for
return url_for('avatar', identifier=imgproxy.identifier_for(self.avatar_url))
@db.validates('policy_keep_younger', 'policy_delete_every')
def validate_intervals(self, key, value):
if not (value == timedelta(0) or value >= timedelta(minutes=1)):
value = timedelta(minutes=1)
if key == 'policy_delete_every' and \
self.next_delete and\
datetime.now(timezone.utc) + value < self.next_delete:
# make sure that next delete is not in the far future
self.next_delete = datetime.now(timezone.utc) + value
return value
# pylint: disable=R0201
@db.validates('policy_keep_latest')
def validate_empty_string_is_zero(self, key, value):
if isinstance(value, str) and value.strip() == '':
return 0
return value
@db.validates('policy_enabled')
def on_policy_enable(self, key, enable):
if not self.policy_enabled and enable:
self.next_delete = (
datetime.now(timezone.utc) + self.policy_delete_every)
self.reason = None
self.dormant = False
return enable
@db.validates('policy_keep_direct')
def validate_bool_accept_string(self, key, value):
if isinstance(value, str):
return value.lower() == 'true'
return value
# backref: tokens
# backref: twitter_archives
# backref: posts
# backref: sessions
def post_count(self):
return Post.query.with_parent(self, 'posts').count()
def estimate_eligible_for_delete(self):
"""
this is an estimation because we do not know if favourite status has
changed since last time a post was refreshed and it is unfeasible to
refresh every single post every time we need to know how many posts are
eligible to delete
"""
latest_n_posts = (Post.query.with_parent(self, 'posts')
.order_by(db.desc(Post.created_at))
.limit(self.policy_keep_latest))
query = (Post.query.with_parent(self, 'posts')
.filter(Post.created_at <=
db.func.now() - self.policy_keep_younger)
.except_(latest_n_posts))
if(self.policy_keep_favourites != 'none'):
query = query.filter(db.or_(
Post.favourite == (self.policy_keep_favourites == 'deleteonly'),
Post.is_reblog))
if(self.policy_keep_media != 'none'):
query = query.filter(db.or_(
Post.has_media == (self.policy_keep_media == 'deleteonly'),
Post.is_reblog))
if(self.policy_keep_direct):
query = query.filter(~Post.direct)
return query.count()
def force_log_out(self):
Session.query.with_parent(self).delete()
db.session.commit()
def backoff(self):
self.backoff_level = min(self.backoff_level + 1, self.BACKOFF_MAX)
backoff_for = 10 * 2 ** self.backoff_level
backoff_for *= random.uniform(1, 1.3)
self.backoff_until = datetime.utcnow() + timedelta(seconds=backoff_for)
def reset_backoff(self):
self.backoff_until = datetime.utcnow()
self.backoff_level = 0
class Account(Account, db.Model):
def __str__(self):
return f"<Account({self.id}, {self.screen_name}, {self.display_name})>"
class OAuthToken(db.Model, TimestampMixin):
__tablename__ = 'oauth_tokens'
token = db.Column(db.String, primary_key=True)
token_secret = db.Column(db.String, nullable=True)
account_id = db.Column(db.String,
db.ForeignKey('accounts.id', ondelete='CASCADE',
onupdate='CASCADE'),
nullable=True, index=True)
account = db.relationship(
Account,
backref=db.backref('tokens',
order_by=lambda: db.desc(OAuthToken.created_at))
)
# note: account_id is nullable here because we don't know what account a
# token is for until we call /account/verify_credentials with it
class Session(db.Model, TimestampMixin):
__tablename__ = 'sessions'
id = db.Column(db.String, primary_key=True,
default=secrets.token_urlsafe)
account_id = db.Column(
db.String,
db.ForeignKey('accounts.id',
ondelete='CASCADE', onupdate='CASCADE'),
nullable=False, index=True)
account = db.relationship(Account, lazy='joined', backref='sessions')
csrf_token = db.Column(db.String,
default=secrets.token_urlsafe,
nullable=False)
class Post(db.Model, TimestampMixin, RemoteIDMixin):
__tablename__ = 'posts'
id = db.Column(db.String, primary_key=True)
author_id = db.Column(
db.String,
db.ForeignKey('accounts.id',
ondelete='CASCADE', onupdate='CASCADE'),
nullable=False)
author = db.relationship(
Account,
foreign_keys = (author_id,),
backref=db.backref('posts',
order_by=lambda: db.desc(Post.created_at)))
favourite = db.Column(db.Boolean, server_default='FALSE', nullable=False)
has_media = db.Column(db.Boolean, server_default='FALSE', nullable=False)
direct = db.Column(db.Boolean, server_default='FALSE', nullable=False)
favourites = db.Column(db.Integer)
reblogs = db.Column(db.Integer)
is_reblog = db.Column(db.Boolean, server_default='FALSE', nullable=False)
def __str__(self):
return '<Post ({}, Author: {})>'.format(self.id, self.author_id)
db.Index('ix_posts_author_id_created_at', Post.author_id, Post.created_at)
class TwitterArchive(db.Model, TimestampMixin):
__tablename__ = 'twitter_archives'
id = db.Column(db.Integer, primary_key=True)
account_id = db.Column(
db.String,
db.ForeignKey('accounts.id',
onupdate='CASCADE', ondelete='CASCADE'),
nullable=False)
account = db.relationship(
Account,
backref=db.backref('twitter_archives',
order_by=lambda: db.desc(TwitterArchive.id)))
body = db.deferred(db.Column(db.LargeBinary, nullable=False))
chunks = db.Column(db.Integer)
chunks_successful = db.Column(db.Integer,
server_default='0', nullable=False)
chunks_failed = db.Column(db.Integer, server_default='0', nullable=False)
def status(self):
if self.chunks is None or self.chunks_failed > 0:
return 'failed'
if self.chunks_successful == self.chunks:
return 'successful'
return 'pending'
ProtoEnum = db.Enum('http', 'https', name='enum_protocol')
class MastodonApp(db.Model, TimestampMixin):
__tablename__ = 'mastodon_apps'
instance = db.Column(db.String, primary_key=True)
client_id = db.Column(db.String, nullable=False)
client_secret = db.Column(db.String, nullable=False)
protocol = db.Column(ProtoEnum, nullable=False)
class MastodonInstance(db.Model):
"""
this is for the autocomplete in the mastodon login form
it isn't coupled with anything else so that we can seed it with
some popular instances ahead of time
"""
__tablename__ = 'mastodon_instances'
instance = db.Column(db.String, primary_key=True)
popularity = db.Column(db.Float, server_default='10', nullable=False)
def bump(self, value=1):
self.popularity = (self.popularity or 10) + value