From 04c4ccd6b9ddf6682cd9bc7088ac8d15e9882f81 Mon Sep 17 00:00:00 2001 From: Florian Obser Date: Fri, 20 Aug 2021 09:09:48 +0200 Subject: [PATCH] add archive_media configuration option With the archive_media option set to true, media attachments are archived together with the toot. Resolves #72 --- docs/install.md | 1 + ephemetoot/ephemetoot.py | 25 +++++++++++++++++++++++++ example-config.yaml | 1 + 3 files changed, 27 insertions(+) diff --git a/docs/install.md b/docs/install.md index 03fd64b..788d463 100644 --- a/docs/install.md +++ b/docs/install.md @@ -66,6 +66,7 @@ Calling `--init` will save your configuration file as `config.yaml` in the curre | hashtags_to_keep | A list of hashtags, where any toots with any of these hashtags will be kept regardless of age. Do not include the '#' symbol. Do remember the [rules for hashtags](https://docs.joinmastodon.org/user/posting/#hashtags) | | visibility_to_keep | Toots with any of the visibility settings in this list will be kept regardless of age. Options are: `public`, `unlisted`, `private`, `direct`. | | archive | A string representing the filepath to your toot archive. If this is provided, for every toot checked, the full toot is archived into individual files named by the toot's `id` in this writeable directory. Note that the default is for **all** toots to be archived, not just those that are being deleted. It is generally best to use an absolute file path - relative paths will not work if you call `ephemetoot` from another directory. | +| archive_media | Either `true` or `false` - if `true`, media attachments are archived when a toot is archived. | All values other than `access_token`, `username` and `base_url` are optional, however if you include `toots_to_keep`, `hashtags_to_keep`, or `visibility_to_keep` you must make each a list, even if it is empty: diff --git a/ephemetoot/ephemetoot.py b/ephemetoot/ephemetoot.py index 8002de1..e3be676 100644 --- a/ephemetoot/ephemetoot.py +++ b/ephemetoot/ephemetoot.py @@ -3,6 +3,7 @@ from datetime import date, datetime, timedelta, timezone import json import os import re +import urllib.parse import subprocess import sys import time @@ -206,6 +207,9 @@ def init(): tags, "Archive path", "(optional filepath for archive):" ) + if len(conf_archive) > 0: + conf_archive_media = yes_no_input(tags, "Archive media?") + # write out the config file with open("config.yaml", "w") as configfile: @@ -236,6 +240,7 @@ def init(): if len(conf_archive) > 0: configfile.write("\n archive: " + conf_archive) + configfile.write("\n archive_media: " + conf_archive_media) configfile.close() @@ -323,7 +328,22 @@ def schedule(options): print(e) +def archive_toot_media(archive_path, full_url): + url = urllib.parse.urlparse(full_url) + (dir_name, file_name) = os.path.split(url.path) + media_archive_path = os.path.join(archive_path, url.netloc, dir_name[1:]) + media_archive_file_path = os.path.join(media_archive_path, file_name) + if os.path.isfile(media_archive_file_path): + return + os.makedirs(media_archive_path, exist_ok=True) + r = requests.get(full_url) + with open(media_archive_file_path, "wb") as f: + f.write(r.content) + + def archive_toot(config, toot): + archive_media = "archive_media" in config and config["archive_media"] + # define archive path if config["archive"][0] == "~": archive_path = os.path.expanduser(config["archive"]) @@ -341,6 +361,11 @@ def archive_toot(config, toot): f.write(json.dumps(toot, indent=4, default=jsondefault)) f.close() + if archive_media and "media_attachments" in toot: + for media_attachment in toot["media_attachments"]: + if "url" in media_attachment: + archive_toot_media(archive_path, media_attachment["url"]) + def jsondefault(obj): if isinstance(obj, (date, datetime)): diff --git a/example-config.yaml b/example-config.yaml index 3f32d34..9511c99 100644 --- a/example-config.yaml +++ b/example-config.yaml @@ -28,6 +28,7 @@ - direct - private archive : Users/alice/toots_archive/ausglam/ + archive_media: true - # minimal example # values other than access_token, username, and base_url are all optional