From 4dc0547dc0e80a4289f46cd8ee5b3aaf855f1f1e Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Mon, 30 Oct 2023 18:35:11 +0100 Subject: [PATCH] [feature] Customizable media cleaner schedule (#2304) --- cmd/gotosocial/action/server/server.go | 25 ++++--- cmd/gotosocial/action/testrig/testrig.go | 7 +- docs/admin/media_caching.md | 57 +++++++++++++++ docs/admin/settings.md | 2 +- docs/configuration/media.md | 42 ++++++++--- example/config.yaml | 42 ++++++++--- .../wellknown/webfinger/webfingerget_test.go | 3 +- internal/cleaner/cleaner.go | 58 ++++++++++++--- internal/config/config.go | 2 + internal/config/defaults.go | 2 + internal/config/flags.go | 2 + internal/config/helpers.gen.go | 50 +++++++++++++ internal/gotosocial/gotosocial.go | 71 ++++++++----------- internal/processing/admin/admin.go | 11 ++- internal/processing/admin/admin_test.go | 2 + internal/processing/processor.go | 4 +- internal/processing/processor_test.go | 3 +- internal/processing/workers/workers_test.go | 3 +- mkdocs.yml | 1 + test/envparsing.sh | 2 + testrig/config.go | 6 +- testrig/processor.go | 3 +- 22 files changed, 300 insertions(+), 98 deletions(-) create mode 100644 docs/admin/media_caching.md diff --git a/cmd/gotosocial/action/server/server.go b/cmd/gotosocial/action/server/server.go index 2e32ec657..3b21c7ebe 100644 --- a/cmd/gotosocial/action/server/server.go +++ b/cmd/gotosocial/action/server/server.go @@ -32,6 +32,7 @@ import ( "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action" "github.com/superseriousbusiness/gotosocial/internal/api" apiutil "github.com/superseriousbusiness/gotosocial/internal/api/util" + "github.com/superseriousbusiness/gotosocial/internal/cleaner" "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/middleware" tlprocessor "github.com/superseriousbusiness/gotosocial/internal/processing/timeline" @@ -173,8 +174,19 @@ var Start action.GTSAction = func(ctx context.Context) error { return fmt.Errorf("error starting list timeline: %s", err) } + // Create a media cleaner using the given state. + cleaner := cleaner.New(&state) + // Create the processor using all the other services we've created so far. - processor := processing.NewProcessor(typeConverter, federator, oauthServer, mediaManager, &state, emailSender) + processor := processing.NewProcessor( + cleaner, + typeConverter, + federator, + oauthServer, + mediaManager, + &state, + emailSender, + ) // Set state client / federator asynchronous worker enqueue functions state.Workers.EnqueueClientAPI = processor.Workers().EnqueueClientAPI @@ -297,12 +309,9 @@ var Start action.GTSAction = func(ctx context.Context) error { activityPubModule.RoutePublicKey(router, s2sLimit, pkThrottle, gzip) webModule.Route(router, fsLimit, fsThrottle, gzip) - gts, err := gotosocial.NewServer(dbService, router, federator, mediaManager) - if err != nil { - return fmt.Errorf("error creating gotosocial service: %s", err) - } - - if err := gts.Start(ctx); err != nil { + // Start the GoToSocial server. + server := gotosocial.NewServer(dbService, router, cleaner) + if err := server.Start(ctx); err != nil { return fmt.Errorf("error starting gotosocial service: %s", err) } @@ -313,7 +322,7 @@ var Start action.GTSAction = func(ctx context.Context) error { log.Infof(ctx, "received signal %s, shutting down", sig) // close down all running services in order - if err := gts.Stop(ctx); err != nil { + if err := server.Stop(ctx); err != nil { return fmt.Errorf("error closing gotosocial service: %s", err) } diff --git a/cmd/gotosocial/action/testrig/testrig.go b/cmd/gotosocial/action/testrig/testrig.go index 288c5975d..f125cd119 100644 --- a/cmd/gotosocial/action/testrig/testrig.go +++ b/cmd/gotosocial/action/testrig/testrig.go @@ -32,6 +32,7 @@ import ( "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action" "github.com/superseriousbusiness/gotosocial/internal/api" apiutil "github.com/superseriousbusiness/gotosocial/internal/api/util" + "github.com/superseriousbusiness/gotosocial/internal/cleaner" "github.com/superseriousbusiness/gotosocial/internal/config" "github.com/superseriousbusiness/gotosocial/internal/gotosocial" "github.com/superseriousbusiness/gotosocial/internal/gtserror" @@ -211,11 +212,9 @@ var Start action.GTSAction = func(ctx context.Context) error { activityPubModule.RoutePublicKey(router) webModule.Route(router) - gts, err := gotosocial.NewServer(state.DB, router, federator, mediaManager) - if err != nil { - return fmt.Errorf("error creating gotosocial service: %s", err) - } + cleaner := cleaner.New(&state) + gts := gotosocial.NewServer(state.DB, router, cleaner) if err := gts.Start(ctx); err != nil { return fmt.Errorf("error starting gotosocial service: %s", err) } diff --git a/docs/admin/media_caching.md b/docs/admin/media_caching.md new file mode 100644 index 000000000..bbd8b8995 --- /dev/null +++ b/docs/admin/media_caching.md @@ -0,0 +1,57 @@ +# Media Caching + +GoToSocial uses the configured [storage backend](https://docs.gotosocial.org/en/latest/configuration/storage/) in order to store media (images, videos, etc) uploaded to the instance by local users, as well as to cache media attached to posts and profiles federated in from remote instances. + +Media uploaded by local instance users will be kept in storage forever (unless the post or profile it's attached to is deleted), so that it's always available to be served in response to requests coming from remote instances. + +Remote media, on the other hand, is cached only temporarily. After a certain amount of time (see below), it will be removed from storage to help alleviate storage space usage. Remote media uncached this way will be re-fetched automatically from the remote instance if it's needed again. + +!!! info "Why cache?" + There is an argument to be made for not caching remote media at all, since it's always available on the origin server. Why not just forego caching entirely, and rely on the remote instance to serve everything on demand? + + While this is a straightforward approach to saving storage space, it can cause other problems and is generally considered to be rather impolite. + + For example, say someone from a small instance makes a funny post with an image attached. The post gets boosted by an account that's followed by 1,000 people across 5 different instances (200 on each instance). Each of those 1,000 people then have the image put in their timeline at once. + + With no remote media caching in place, this may cause up to 1,000 requests to hit the small instance simultaneously, as the browser of each recipient of the post must go and make a unique request to fetch the image from the small instance. This causes a large traffic spike for the small instance. In extreme scenarios, this can cause the instance to become unresponsive or crash, essentially DDOS'ing it. + + With remote media caching in place, however, boosting a post to 1,000 people across 5 different instances will cause only 5 requests to the small instance: 1 request for each instance. Each instance will then serve 200 requests to its local users from the cached version of the remote image, effectively spreading the load and sparing the smaller instance. + +## Cleanup + +Cleanup of the remote media cache occurs as a scheduled background process, and no manual intervention is required by admins. Cleanup takes somewhere between 5-30 minutes depending on the speed of the server, the speed of the configured storage, and the amount of media to work through. + +GoToSocial exposes three variables that let you, the admin, tune when and how this work is performed: `media-remote-cache-days`, `media-cleanup-from` and `media-cleanup-every`. + +By default, these variables are set to the following values: + +| Variable name | Default | Meaning | +|---------------------------|--------------|----------| +| `media-remote-cache-days` | `7` | 7 days | +| `media-cleanup-from` | `"00:00"` | midnight | +| `media-cleanup-every` | `"24h"` | daily | + +In other words, the default settings mean that every night at midnight, remote media older than a week will be uncached and removed from storage. + +You can achieve different results by tuning these variables. For example, say you wanted to prune at 4.30am instead of midnight, you could change `media-cleanup-from` to `"04:30"`. + +If you only want to prune every couple of days instead of every night, you could set `media-cleanup-every` to a higher value, like `"48h"` or `"72h"`. + +If you wanted to adopt a more aggressive cleanup strategy to minimize storage usage, you could set the following values: + +| Variable name | Setting | Meaning | +|---------------------------|--------------|-------------| +| `media-remote-cache-days` | `1` | 1 day | +| `media-cleanup-from` | `"00:00"` | midnight | +| `media-cleanup-every` | `"8h"` | every 8 hrs | + +The above settings would mean that every 8 hours starting from midnight, GoToSocial would prune any media older than 1 day (24hrs). The prune jobs would run at 00:00, 08:00, and 16:00, ie., midnight, 8am, and 4pm. With this configuration, the longest amount of time you could possibly keep remote media in your storage would be about 32 hours. + +!!! tip + Setting `media-remote-cache-days` to 0 or less means that remote media will never be uncached. However, cleanup jobs for orphaned local media and other consistency checks will still be run using the schedule defined by the other variables. + +!!! tip + You can also run cleanup manually as a one-off action through the admin panel, if you so wish ([see docs](./settings.md#media)). + +!!! warning + Setting `media-cleanup-every` to a very small value like `"30m"` or less will probably cause your instance to just constantly iterate through attachments, causing high database use for very little benefit. We don't recommend setting this value to less than about `"8h"` and even that is probably overkill. diff --git a/docs/admin/settings.md b/docs/admin/settings.md index 344a97473..e19bc91bc 100644 --- a/docs/admin/settings.md +++ b/docs/admin/settings.md @@ -68,7 +68,7 @@ Run one-off administrative actions. #### Media -You can use this section run a media action to clean up the remote media cache using the specified number of days. Media older than the given number of days will be removed from storage (s3 or local). Media removed in this way will be refetched again later if the media is required again. This action is functionally identical to the media cleanup that runs every night, automatically. +You can use this section run a media action to clean up the remote media cache using the specified number of days. Media older than the given number of days will be removed from storage (s3 or local). Media removed in this way will be refetched again later if the media is required again. This action is functionally identical to the media cleanup that runs automatically. #### Keys diff --git a/docs/configuration/media.md b/docs/configuration/media.md index 96742ae63..2bbdff420 100644 --- a/docs/configuration/media.md +++ b/docs/configuration/media.md @@ -29,17 +29,6 @@ media-description-min-chars: 0 # Default: 500 media-description-max-chars: 500 -# Int. Number of days to cache media from remote instances before they are removed from the cache. -# A job will run every day at midnight to clean up any remote media older than the given amount of days. -# -# When remote media is removed from the cache, it is deleted from storage but the database entries for the media -# are kept so that it can be fetched again if requested by a user. -# -# If this is set to 0, then media from remote instances will be cached indefinitely. -# Examples: [30, 60, 7, 0] -# Default: 7 -media-remote-cache-days: 7 - # Int. Max size in bytes of emojis uploaded to this instance via the admin API. # The default is the same as the Mastodon size limit for emojis (50kb), which allows # for good interoperability. Raising this limit may cause issues with federation @@ -55,4 +44,35 @@ media-emoji-local-max-size: 51200 # Examples: [51200, 102400] # Default: 102400 media-emoji-remote-max-size: 102400 + +# The below media cleanup settings allow admins to customize when and +# how often media cleanup + prune jobs run, while being set to a fairly +# sensible default (every night @ midnight). For more information on exactly +# what these settings do, with some customization examples, see the docs: +# https://docs.gotosocial.org/en/latest/admin/media_caching#cleanup + +# Int. Number of days to cache media from remote instances before +# they are removed from the cache. When remote media is removed from +# the cache, it is deleted from storage but the database entries for +# the media are kept so that it can be fetched again if requested by a user. +# +# If this is set to 0, then media from remote instances will be cached indefinitely. +# +# Examples: [30, 60, 7, 0] +# Default: 7 +media-remote-cache-days: 7 + +# String. 24hr time of day formatted as hh:mm. +# Examples: ["14:30", "00:00", "04:00"] +# Default: "00:00" (midnight). +media-cleanup-from: "00:00" + +# Duration. Period between media cleanup runs. +# More than once per 24h is not recommended +# is likely overkill. Setting this to something +# very low like once every 10 minutes will probably +# cause lag and possibly other issues. +# Examples: ["24h", "72h", "12h"] +# Default: "24h" (once per day). +media-cleanup-every: "24h" ``` diff --git a/example/config.yaml b/example/config.yaml index 816339f8d..78f52099a 100644 --- a/example/config.yaml +++ b/example/config.yaml @@ -410,17 +410,6 @@ media-description-min-chars: 0 # Default: 500 media-description-max-chars: 500 -# Int. Number of days to cache media from remote instances before they are removed from the cache. -# A job will run every day at midnight to clean up any remote media older than the given amount of days. -# -# When remote media is removed from the cache, it is deleted from storage but the database entries for the media -# are kept so that it can be fetched again if requested by a user. -# -# If this is set to 0, then media from remote instances will be cached indefinitely. -# Examples: [30, 60, 7, 0] -# Default: 7 -media-remote-cache-days: 7 - # Int. Max size in bytes of emojis uploaded to this instance via the admin API. # The default is the same as the Mastodon size limit for emojis (50kb), which allows # for good interoperability. Raising this limit may cause issues with federation @@ -437,6 +426,37 @@ media-emoji-local-max-size: 51200 # Default: 102400 media-emoji-remote-max-size: 102400 +# The below media cleanup settings allow admins to customize when and +# how often media cleanup + prune jobs run, while being set to a fairly +# sensible default (every night @ midnight). For more information on exactly +# what these settings do, with some customization examples, see the docs: +# https://docs.gotosocial.org/en/latest/admin/media_caching#cleanup + +# Int. Number of days to cache media from remote instances before +# they are removed from the cache. When remote media is removed from +# the cache, it is deleted from storage but the database entries for +# the media are kept so that it can be fetched again if requested by a user. +# +# If this is set to 0, then media from remote instances will be cached indefinitely. +# +# Examples: [30, 60, 7, 0] +# Default: 7 +media-remote-cache-days: 7 + +# String. 24hr time of day formatted as hh:mm. +# Examples: ["14:30", "00:00", "04:00"] +# Default: "00:00" (midnight). +media-cleanup-from: "00:00" + +# Duration. Period between media cleanup runs. +# More than once per 24h is not recommended +# is likely overkill. Setting this to something +# very low like once every 10 minutes will probably +# cause lag and possibly other issues. +# Examples: ["24h", "72h", "12h"] +# Default: "24h" (once per day). +media-cleanup-every: "24h" + ########################## ##### STORAGE CONFIG ##### ########################## diff --git a/internal/api/wellknown/webfinger/webfingerget_test.go b/internal/api/wellknown/webfinger/webfingerget_test.go index fb450470f..6b3e2cc5d 100644 --- a/internal/api/wellknown/webfinger/webfingerget_test.go +++ b/internal/api/wellknown/webfinger/webfingerget_test.go @@ -33,6 +33,7 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/ap" apiutil "github.com/superseriousbusiness/gotosocial/internal/api/util" "github.com/superseriousbusiness/gotosocial/internal/api/wellknown/webfinger" + "github.com/superseriousbusiness/gotosocial/internal/cleaner" "github.com/superseriousbusiness/gotosocial/internal/config" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/processing" @@ -82,7 +83,7 @@ func (suite *WebfingerGetTestSuite) funkifyAccountDomain(host string, accountDom // to new host + account domain. config.SetHost(host) config.SetAccountDomain(accountDomain) - suite.processor = processing.NewProcessor(suite.tc, suite.federator, testrig.NewTestOauthServer(suite.db), testrig.NewTestMediaManager(&suite.state), &suite.state, suite.emailSender) + suite.processor = processing.NewProcessor(cleaner.New(&suite.state), suite.tc, suite.federator, testrig.NewTestOauthServer(suite.db), testrig.NewTestMediaManager(&suite.state), &suite.state, suite.emailSender) suite.webfingerModule = webfinger.New(suite.processor) // Generate a new account for the diff --git a/internal/cleaner/cleaner.go b/internal/cleaner/cleaner.go index 31766bae6..1139a85bb 100644 --- a/internal/cleaner/cleaner.go +++ b/internal/cleaner/cleaner.go @@ -47,7 +47,6 @@ func New(state *state.State) *Cleaner { c.state = state c.emoji.Cleaner = c c.media.Cleaner = c - scheduleJobs(c) return c } @@ -109,16 +108,46 @@ func (c *Cleaner) removeFiles(ctx context.Context, files ...string) (int, error) return diff, nil } -func scheduleJobs(c *Cleaner) { - const day = time.Hour * 24 +// ScheduleJobs schedules cleaning +// jobs using configured parameters. +// +// Returns an error if `MediaCleanupFrom` +// is not a valid format (hh:mm:ss). +func (c *Cleaner) ScheduleJobs() error { + const hourMinute = "15:04" - // Calculate closest midnight. - now := time.Now() - midnight := now.Round(day) + var ( + now = time.Now() + cleanupEvery = config.GetMediaCleanupEvery() + cleanupFromStr = config.GetMediaCleanupFrom() + ) - if midnight.Before(now) { - // since <= 11:59am rounds down. - midnight = midnight.Add(day) + // Parse cleanupFromStr as hh:mm. + // Resulting time will be on 1 Jan year zero. + cleanupFrom, err := time.Parse(hourMinute, cleanupFromStr) + if err != nil { + return gtserror.Newf( + "error parsing '%s' in time format 'hh:mm': %w", + cleanupFromStr, err, + ) + } + + // Time travel from + // year zero, groovy. + firstCleanupAt := time.Date( + now.Year(), + now.Month(), + now.Day(), + cleanupFrom.Hour(), + cleanupFrom.Minute(), + 0, + 0, + now.Location(), + ) + + // Ensure first cleanup is in the future. + for firstCleanupAt.Before(now) { + firstCleanupAt = firstCleanupAt.Add(cleanupEvery) } // Get ctx associated with scheduler run state. @@ -129,11 +158,18 @@ func scheduleJobs(c *Cleaner) { // jobs restartable if we want to implement reloads in // the future that make call to Workers.Stop() -> Workers.Start(). - // Schedule the cleaning tasks to execute every day at midnight. + log.Infof(nil, + "scheduling media clean to run every %s, starting from %s; next clean will run at %s", + cleanupEvery, cleanupFromStr, firstCleanupAt, + ) + + // Schedule the cleaning tasks to execute according to given schedule. c.state.Workers.Scheduler.Schedule(sched.NewJob(func(start time.Time) { log.Info(nil, "starting media clean") c.Media().All(doneCtx, config.GetMediaRemoteCacheDays()) c.Emoji().All(doneCtx, config.GetMediaRemoteCacheDays()) log.Infof(nil, "finished media clean after %s", time.Since(start)) - }).EveryAt(midnight, day)) + }).EveryAt(firstCleanupAt, cleanupEvery)) + + return nil } diff --git a/internal/config/config.go b/internal/config/config.go index a9fdef3c7..77e70185c 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -97,6 +97,8 @@ type Configuration struct { MediaRemoteCacheDays int `name:"media-remote-cache-days" usage:"Number of days to locally cache media from remote instances. If set to 0, remote media will be kept indefinitely."` MediaEmojiLocalMaxSize bytesize.Size `name:"media-emoji-local-max-size" usage:"Max size in bytes of emojis uploaded to this instance via the admin API."` MediaEmojiRemoteMaxSize bytesize.Size `name:"media-emoji-remote-max-size" usage:"Max size in bytes of emojis to download from other instances."` + MediaCleanupFrom string `name:"media-cleanup-from" usage:"Time of day from which to start running media cleanup/prune jobs. Should be in the format 'hh:mm:ss', eg., '15:04:05'."` + MediaCleanupEvery time.Duration `name:"media-cleanup-every" usage:"Period to elapse between cleanups, starting from media-cleanup-at."` StorageBackend string `name:"storage-backend" usage:"Storage backend to use for media attachments"` StorageLocalBasePath string `name:"storage-local-base-path" usage:"Full path to an already-created directory where gts should store/retrieve media files. Subfolders will be created within this dir."` diff --git a/internal/config/defaults.go b/internal/config/defaults.go index 6ee52d162..0c2556e9d 100644 --- a/internal/config/defaults.go +++ b/internal/config/defaults.go @@ -76,6 +76,8 @@ var Defaults = Configuration{ MediaRemoteCacheDays: 7, MediaEmojiLocalMaxSize: 50 * bytesize.KiB, MediaEmojiRemoteMaxSize: 100 * bytesize.KiB, + MediaCleanupFrom: "00:00", // Midnight. + MediaCleanupEvery: 24 * time.Hour, // 1/day. StorageBackend: "local", StorageLocalBasePath: "/gotosocial/storage", diff --git a/internal/config/flags.go b/internal/config/flags.go index 29e0726a6..b29d0fe04 100644 --- a/internal/config/flags.go +++ b/internal/config/flags.go @@ -103,6 +103,8 @@ func (s *ConfigState) AddServerFlags(cmd *cobra.Command) { cmd.Flags().Int(MediaRemoteCacheDaysFlag(), cfg.MediaRemoteCacheDays, fieldtag("MediaRemoteCacheDays", "usage")) cmd.Flags().Uint64(MediaEmojiLocalMaxSizeFlag(), uint64(cfg.MediaEmojiLocalMaxSize), fieldtag("MediaEmojiLocalMaxSize", "usage")) cmd.Flags().Uint64(MediaEmojiRemoteMaxSizeFlag(), uint64(cfg.MediaEmojiRemoteMaxSize), fieldtag("MediaEmojiRemoteMaxSize", "usage")) + cmd.Flags().String(MediaCleanupFromFlag(), cfg.MediaCleanupFrom, fieldtag("MediaCleanupFrom", "usage")) + cmd.Flags().Duration(MediaCleanupEveryFlag(), cfg.MediaCleanupEvery, fieldtag("MediaCleanupEvery", "usage")) // Storage cmd.Flags().String(StorageBackendFlag(), cfg.StorageBackend, fieldtag("StorageBackend", "usage")) diff --git a/internal/config/helpers.gen.go b/internal/config/helpers.gen.go index 80687eb66..415035bea 100644 --- a/internal/config/helpers.gen.go +++ b/internal/config/helpers.gen.go @@ -1224,6 +1224,56 @@ func GetMediaEmojiRemoteMaxSize() bytesize.Size { return global.GetMediaEmojiRem // SetMediaEmojiRemoteMaxSize safely sets the value for global configuration 'MediaEmojiRemoteMaxSize' field func SetMediaEmojiRemoteMaxSize(v bytesize.Size) { global.SetMediaEmojiRemoteMaxSize(v) } +// GetMediaCleanupFrom safely fetches the Configuration value for state's 'MediaCleanupFrom' field +func (st *ConfigState) GetMediaCleanupFrom() (v string) { + st.mutex.RLock() + v = st.config.MediaCleanupFrom + st.mutex.RUnlock() + return +} + +// SetMediaCleanupFrom safely sets the Configuration value for state's 'MediaCleanupFrom' field +func (st *ConfigState) SetMediaCleanupFrom(v string) { + st.mutex.Lock() + defer st.mutex.Unlock() + st.config.MediaCleanupFrom = v + st.reloadToViper() +} + +// MediaCleanupFromFlag returns the flag name for the 'MediaCleanupFrom' field +func MediaCleanupFromFlag() string { return "media-cleanup-from" } + +// GetMediaCleanupFrom safely fetches the value for global configuration 'MediaCleanupFrom' field +func GetMediaCleanupFrom() string { return global.GetMediaCleanupFrom() } + +// SetMediaCleanupFrom safely sets the value for global configuration 'MediaCleanupFrom' field +func SetMediaCleanupFrom(v string) { global.SetMediaCleanupFrom(v) } + +// GetMediaCleanupEvery safely fetches the Configuration value for state's 'MediaCleanupEvery' field +func (st *ConfigState) GetMediaCleanupEvery() (v time.Duration) { + st.mutex.RLock() + v = st.config.MediaCleanupEvery + st.mutex.RUnlock() + return +} + +// SetMediaCleanupEvery safely sets the Configuration value for state's 'MediaCleanupEvery' field +func (st *ConfigState) SetMediaCleanupEvery(v time.Duration) { + st.mutex.Lock() + defer st.mutex.Unlock() + st.config.MediaCleanupEvery = v + st.reloadToViper() +} + +// MediaCleanupEveryFlag returns the flag name for the 'MediaCleanupEvery' field +func MediaCleanupEveryFlag() string { return "media-cleanup-every" } + +// GetMediaCleanupEvery safely fetches the value for global configuration 'MediaCleanupEvery' field +func GetMediaCleanupEvery() time.Duration { return global.GetMediaCleanupEvery() } + +// SetMediaCleanupEvery safely sets the value for global configuration 'MediaCleanupEvery' field +func SetMediaCleanupEvery(v time.Duration) { global.SetMediaCleanupEvery(v) } + // GetStorageBackend safely fetches the Configuration value for state's 'StorageBackend' field func (st *ConfigState) GetStorageBackend() (v string) { st.mutex.RLock() diff --git a/internal/gotosocial/gotosocial.go b/internal/gotosocial/gotosocial.go index 89d11c579..74cc09f65 100644 --- a/internal/gotosocial/gotosocial.go +++ b/internal/gotosocial/gotosocial.go @@ -20,62 +20,47 @@ package gotosocial import ( "context" + "github.com/superseriousbusiness/gotosocial/internal/cleaner" "github.com/superseriousbusiness/gotosocial/internal/db" - "github.com/superseriousbusiness/gotosocial/internal/federation" - "github.com/superseriousbusiness/gotosocial/internal/media" "github.com/superseriousbusiness/gotosocial/internal/router" ) -// Server is the 'main' function of the gotosocial server, and the place where everything hangs together. -// The logic of stopping and starting the entire server is contained here. -type Server interface { - // Start starts up the gotosocial server. If something goes wrong - // while starting the server, then an error will be returned. - Start(context.Context) error - // Stop closes down the gotosocial server, first closing the router - // then the database. If something goes wrong while stopping, an - // error will be returned. - Stop(context.Context) error +// Server represents a long-running +// GoToSocial server instance. +type Server struct { + db db.DB + apiRouter router.Router + cleaner *cleaner.Cleaner } -// NewServer returns a new gotosocial server, initialized with the given configuration. -// An error will be returned the caller if something goes wrong during initialization -// eg., no db or storage connection, port for router already in use, etc. +// NewServer returns a new +// GoToSocial server instance. func NewServer( db db.DB, apiRouter router.Router, - federator *federation.Federator, - mediaManager *media.Manager, -) (Server, error) { - return &gotosocial{ - db: db, - apiRouter: apiRouter, - federator: federator, - mediaManager: mediaManager, - }, nil + cleaner *cleaner.Cleaner, +) *Server { + return &Server{ + db: db, + apiRouter: apiRouter, + cleaner: cleaner, + } } -// gotosocial fulfils the gotosocial interface. -type gotosocial struct { - db db.DB - apiRouter router.Router - federator *federation.Federator - mediaManager *media.Manager +// Start starts up the GoToSocial server by starting the router, +// then the cleaner. If something goes wrong while starting the +// server, then an error will be returned. +func (s *Server) Start(ctx context.Context) error { + s.apiRouter.Start() + return s.cleaner.ScheduleJobs() } -// Start starts up the gotosocial server. If something goes wrong -// while starting the server, then an error will be returned. -func (gts *gotosocial) Start(ctx context.Context) error { - gts.apiRouter.Start() - return nil -} - -// Stop closes down the gotosocial server, first closing the router, -// then the media manager, then the database. -// If something goes wrong while stopping, an error will be returned. -func (gts *gotosocial) Stop(ctx context.Context) error { - if err := gts.apiRouter.Stop(ctx); err != nil { +// Stop closes down the GoToSocial server, first closing the cleaner, +// then the router, then the database. If something goes wrong while +// stopping, an error will be returned. +func (s *Server) Stop(ctx context.Context) error { + if err := s.apiRouter.Stop(ctx); err != nil { return err } - return gts.db.Close() + return s.db.Close() } diff --git a/internal/processing/admin/admin.go b/internal/processing/admin/admin.go index 51429c11c..3093b3e36 100644 --- a/internal/processing/admin/admin.go +++ b/internal/processing/admin/admin.go @@ -45,10 +45,17 @@ func (p *Processor) Actions() *Actions { } // New returns a new admin processor. -func New(state *state.State, converter *typeutils.Converter, mediaManager *media.Manager, transportController transport.Controller, emailSender email.Sender) Processor { +func New( + state *state.State, + cleaner *cleaner.Cleaner, + converter *typeutils.Converter, + mediaManager *media.Manager, + transportController transport.Controller, + emailSender email.Sender, +) Processor { return Processor{ state: state, - cleaner: cleaner.New(state), + cleaner: cleaner, converter: converter, mediaManager: mediaManager, transportController: transportController, diff --git a/internal/processing/admin/admin_test.go b/internal/processing/admin/admin_test.go index a5a790763..614735ee1 100644 --- a/internal/processing/admin/admin_test.go +++ b/internal/processing/admin/admin_test.go @@ -19,6 +19,7 @@ package admin_test import ( "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/cleaner" "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/email" "github.com/superseriousbusiness/gotosocial/internal/federation" @@ -105,6 +106,7 @@ func (suite *AdminStandardTestSuite) SetupTest() { suite.emailSender = testrig.NewEmailSender("../../../web/template/", suite.sentEmails) suite.processor = processing.NewProcessor( + cleaner.New(&suite.state), suite.tc, suite.federator, suite.oauthServer, diff --git a/internal/processing/processor.go b/internal/processing/processor.go index 47f14a686..b571ff499 100644 --- a/internal/processing/processor.go +++ b/internal/processing/processor.go @@ -18,6 +18,7 @@ package processing import ( + "github.com/superseriousbusiness/gotosocial/internal/cleaner" "github.com/superseriousbusiness/gotosocial/internal/email" "github.com/superseriousbusiness/gotosocial/internal/federation" mm "github.com/superseriousbusiness/gotosocial/internal/media" @@ -126,6 +127,7 @@ func (p *Processor) Workers() *workers.Processor { // NewProcessor returns a new Processor. func NewProcessor( + cleaner *cleaner.Cleaner, converter *typeutils.Converter, federator *federation.Federator, oauthServer oauth.Server, @@ -156,7 +158,7 @@ func NewProcessor( // Instantiate the rest of the sub // processors + pin them to this struct. processor.account = accountProcessor - processor.admin = admin.New(state, converter, mediaManager, federator.TransportController(), emailSender) + processor.admin = admin.New(state, cleaner, converter, mediaManager, federator.TransportController(), emailSender) processor.fedi = fedi.New(state, converter, federator, filter) processor.list = list.New(state, converter) processor.markers = markers.New(state, converter) diff --git a/internal/processing/processor_test.go b/internal/processing/processor_test.go index 63d2c31fe..2e0baae96 100644 --- a/internal/processing/processor_test.go +++ b/internal/processing/processor_test.go @@ -21,6 +21,7 @@ import ( "context" "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/cleaner" "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/email" "github.com/superseriousbusiness/gotosocial/internal/federation" @@ -122,7 +123,7 @@ func (suite *ProcessingStandardTestSuite) SetupTest() { suite.oauthServer = testrig.NewTestOauthServer(suite.db) suite.emailSender = testrig.NewEmailSender("../../web/template/", nil) - suite.processor = processing.NewProcessor(suite.typeconverter, suite.federator, suite.oauthServer, suite.mediaManager, &suite.state, suite.emailSender) + suite.processor = processing.NewProcessor(cleaner.New(&suite.state), suite.typeconverter, suite.federator, suite.oauthServer, suite.mediaManager, &suite.state, suite.emailSender) suite.state.Workers.EnqueueClientAPI = suite.processor.Workers().EnqueueClientAPI suite.state.Workers.EnqueueFediAPI = suite.processor.Workers().EnqueueFediAPI diff --git a/internal/processing/workers/workers_test.go b/internal/processing/workers/workers_test.go index 5712180f5..c97e9eeb8 100644 --- a/internal/processing/workers/workers_test.go +++ b/internal/processing/workers/workers_test.go @@ -21,6 +21,7 @@ import ( "context" "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/cleaner" "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/email" "github.com/superseriousbusiness/gotosocial/internal/federation" @@ -124,7 +125,7 @@ func (suite *WorkersTestSuite) SetupTest() { suite.oauthServer = testrig.NewTestOauthServer(suite.db) suite.emailSender = testrig.NewEmailSender("../../../web/template/", nil) - suite.processor = processing.NewProcessor(suite.typeconverter, suite.federator, suite.oauthServer, suite.mediaManager, &suite.state, suite.emailSender) + suite.processor = processing.NewProcessor(cleaner.New(&suite.state), suite.typeconverter, suite.federator, suite.oauthServer, suite.mediaManager, &suite.state, suite.emailSender) suite.state.Workers.EnqueueClientAPI = suite.processor.Workers().EnqueueClientAPI suite.state.Workers.EnqueueFediAPI = suite.processor.Workers().EnqueueFediAPI diff --git a/mkdocs.yml b/mkdocs.yml index 189f01a7f..5d936c912 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -106,6 +106,7 @@ nav: - "admin/domain_blocks.md" - "admin/cli.md" - "admin/backup_and_restore.md" + - "admin/media_caching.md" - "Federation": - "federation/index.md" - "federation/glossary.md" diff --git a/test/envparsing.sh b/test/envparsing.sh index 34d00dd80..f4bac4ac8 100755 --- a/test/envparsing.sh +++ b/test/envparsing.sh @@ -94,6 +94,8 @@ EXPECT=$(cat << "EOF" "log-db-queries": true, "log-level": "info", "log-timestamp-format": "banana", + "media-cleanup-every": 86400000000000, + "media-cleanup-from": "00:00", "media-description-max-chars": 5000, "media-description-min-chars": 69, "media-emoji-local-max-size": 420, diff --git a/testrig/config.go b/testrig/config.go index 154e61f47..0a322484c 100644 --- a/testrig/config.go +++ b/testrig/config.go @@ -80,8 +80,10 @@ var testDefaults = config.Configuration{ MediaDescriptionMinChars: 0, MediaDescriptionMaxChars: 500, MediaRemoteCacheDays: 7, - MediaEmojiLocalMaxSize: 51200, // 50kb - MediaEmojiRemoteMaxSize: 102400, // 100kb + MediaEmojiLocalMaxSize: 51200, // 50kb + MediaEmojiRemoteMaxSize: 102400, // 100kb + MediaCleanupFrom: "00:00", // midnight. + MediaCleanupEvery: 24 * time.Hour, // 1/day. // the testrig only uses in-memory storage, so we can // safely set this value to 'test' to avoid running storage diff --git a/testrig/processor.go b/testrig/processor.go index 4d4ba3d53..137934c5e 100644 --- a/testrig/processor.go +++ b/testrig/processor.go @@ -18,6 +18,7 @@ package testrig import ( + "github.com/superseriousbusiness/gotosocial/internal/cleaner" "github.com/superseriousbusiness/gotosocial/internal/email" "github.com/superseriousbusiness/gotosocial/internal/federation" "github.com/superseriousbusiness/gotosocial/internal/media" @@ -28,7 +29,7 @@ import ( // NewTestProcessor returns a Processor suitable for testing purposes func NewTestProcessor(state *state.State, federator *federation.Federator, emailSender email.Sender, mediaManager *media.Manager) *processing.Processor { - p := processing.NewProcessor(typeutils.NewConverter(state), federator, NewTestOauthServer(state.DB), mediaManager, state, emailSender) + p := processing.NewProcessor(cleaner.New(state), typeutils.NewConverter(state), federator, NewTestOauthServer(state.DB), mediaManager, state, emailSender) state.Workers.EnqueueClientAPI = p.Workers().EnqueueClientAPI state.Workers.EnqueueFediAPI = p.Workers().EnqueueFediAPI return p