[chore/performance] Update media prune logic, add extra CLI command (#1474)

* start updating media prune stuff a wee bit

* continue prune / uncache work

* more tidying + consistency stuff

* add prune CLI command

* docs

* arg
This commit is contained in:
tobi 2023-02-11 12:48:38 +01:00 committed by GitHub
parent 70739d32cc
commit 40bc03e717
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 1113 additions and 1090 deletions

View File

@ -0,0 +1,78 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package prune
import (
"context"
"fmt"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/db/bundb"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/state"
gtsstorage "github.com/superseriousbusiness/gotosocial/internal/storage"
)
type prune struct {
dbService db.DB
storage *gtsstorage.Driver
manager media.Manager
}
func setupPrune(ctx context.Context) (*prune, error) {
var state state.State
state.Caches.Init()
dbService, err := bundb.NewBunDBService(ctx, &state)
if err != nil {
return nil, fmt.Errorf("error creating dbservice: %w", err)
}
storage, err := gtsstorage.AutoConfig() //nolint:contextcheck
if err != nil {
return nil, fmt.Errorf("error creating storage backend: %w", err)
}
manager, err := media.NewManager(dbService, storage) //nolint:contextcheck
if err != nil {
return nil, fmt.Errorf("error instantiating mediamanager: %w", err)
}
return &prune{
dbService: dbService,
storage: storage,
manager: manager,
}, nil
}
func (p *prune) shutdown(ctx context.Context) error {
if err := p.storage.Close(); err != nil {
return fmt.Errorf("error closing storage backend: %w", err)
}
if err := p.dbService.Stop(ctx); err != nil {
return fmt.Errorf("error closing dbservice: %w", err)
}
if err := p.manager.Stop(); err != nil {
return fmt.Errorf("error closing media manager: %w", err)
}
return nil
}

View File

@ -24,53 +24,28 @@ import (
"github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/db/bundb"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/state"
gtsstorage "github.com/superseriousbusiness/gotosocial/internal/storage"
)
// Orphaned prunes orphaned media from storage.
var Orphaned action.GTSAction = func(ctx context.Context) error {
var state state.State
state.Caches.Init()
dbService, err := bundb.NewBunDBService(ctx, &state)
prune, err := setupPrune(ctx)
if err != nil {
return fmt.Errorf("error creating dbservice: %s", err)
}
storage, err := gtsstorage.AutoConfig()
if err != nil {
return fmt.Errorf("error creating storage backend: %w", err)
}
manager, err := media.NewManager(dbService, storage)
if err != nil {
return fmt.Errorf("error instantiating mediamanager: %s", err)
return err
}
dry := config.GetAdminMediaPruneDryRun()
pruned, err := manager.PruneOrphaned(ctx, dry)
pruned, err := prune.manager.PruneOrphaned(ctx, dry)
if err != nil {
return fmt.Errorf("error pruning: %s", err)
}
if dry /* dick heyyoooooo */ {
log.Infof("DRY RUN: %d stored items are orphaned and eligible to be pruned", pruned)
log.Infof("DRY RUN: %d items are orphaned and eligible to be pruned", pruned)
} else {
log.Infof("%d stored items were orphaned and pruned", pruned)
log.Infof("%d orphaned items were pruned", pruned)
}
if err := storage.Close(); err != nil {
return fmt.Errorf("error closing storage backend: %w", err)
}
if err := dbService.Stop(ctx); err != nil {
return fmt.Errorf("error closing dbservice: %s", err)
}
return nil
return prune.shutdown(ctx)
}

View File

@ -0,0 +1,58 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package prune
import (
"context"
"fmt"
"github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/log"
)
// Remote prunes old and/or unused remote media.
var Remote action.GTSAction = func(ctx context.Context) error {
prune, err := setupPrune(ctx)
if err != nil {
return err
}
dry := config.GetAdminMediaPruneDryRun()
pruned, err := prune.manager.PruneUnusedRemote(ctx, dry)
if err != nil {
return fmt.Errorf("error pruning: %w", err)
}
uncached, err := prune.manager.UncacheRemote(ctx, config.GetMediaRemoteCacheDays(), dry)
if err != nil {
return fmt.Errorf("error pruning: %w", err)
}
total := pruned + uncached
if dry /* dick heyyoooooo */ {
log.Infof("DRY RUN: %d remote items are unused/stale and eligible to be pruned", total)
} else {
log.Infof("%d unused/stale remote items were pruned", pruned)
}
return prune.shutdown(ctx)
}

View File

@ -231,11 +231,6 @@ var Start action.GTSAction = func(ctx context.Context) error {
return fmt.Errorf("error starting gotosocial service: %s", err)
}
// perform initial media prune in case value of MediaRemoteCacheDays changed
if err := processor.AdminMediaPrune(ctx, config.GetMediaRemoteCacheDays()); err != nil {
return fmt.Errorf("error during initial media prune: %s", err)
}
// catch shutdown signals from the operating system
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)

View File

@ -169,7 +169,6 @@ func adminCommands() *cobra.Command {
Use: "prune",
Short: "admin commands for pruning unused/orphaned media from storage",
}
config.AddAdminMediaPrune(adminMediaPruneCmd)
adminMediaPruneOrphanedCmd := &cobra.Command{
Use: "orphaned",
@ -184,6 +183,19 @@ func adminCommands() *cobra.Command {
config.AddAdminMediaPrune(adminMediaPruneOrphanedCmd)
adminMediaPruneCmd.AddCommand(adminMediaPruneOrphanedCmd)
adminMediaPruneRemoteCmd := &cobra.Command{
Use: "remote",
Short: "prune unused/stale remote media from storage, older than given number of days",
PreRunE: func(cmd *cobra.Command, args []string) error {
return preRun(preRunArgs{cmd: cmd})
},
RunE: func(cmd *cobra.Command, args []string) error {
return run(cmd.Context(), prune.Remote)
},
}
config.AddAdminMediaPrune(adminMediaPruneRemoteCmd)
adminMediaPruneCmd.AddCommand(adminMediaPruneRemoteCmd)
adminMediaCmd.AddCommand(adminMediaPruneCmd)
adminCmd.AddCommand(adminMediaCmd)

View File

@ -287,3 +287,40 @@ Example (for real):
```bash
gotosocial admin media prune orphaned --dry-run=false
```
### gotosocial admin media prune remote
This command can be used to prune unused/stale remote media from your GoToSocial.
Stale media means avatars/headers/status attachments from remote instances that are older than `media-remote-cache-days`.
These items will be refetched later on demand, if necessary.
Unused media means avatars/headers/status attachments which are not currently in use by an account or status.
**This command only works when GoToSocial is not running, since it acquires an exclusive lock on storage. Stop GoToSocial first before running this command!**
```text
prune unused/stale remote media from storage, older than given number of days
Usage:
gotosocial admin media prune remote [flags]
Flags:
--dry-run perform a dry run and only log number of items eligible for pruning (default true)
-h, --help help for remote
```
By default, this command performs a dry run, which will log how many items can be pruned. To do it for real, add `--dry-run=false` to the command.
Example (dry run):
```bash
gotosocial admin media prune remote
```
Example (for real):
```bash
gotosocial admin media prune remote --dry-run=false
```

View File

@ -3608,7 +3608,7 @@ paths:
- application/json
- application/xml
- application/x-www-form-urlencoded
description: Also cleans up unused headers + avatars from the media cache.
description: Also cleans up unused headers + avatars from the media cache and prunes orphaned items from storage.
operationId: mediaCleanup
parameters:
- description: |-

View File

@ -33,7 +33,8 @@ import (
// MediaCleanupPOSTHandler swagger:operation POST /api/v1/admin/media_cleanup mediaCleanup
//
// Clean up remote media older than the specified number of days.
// Also cleans up unused headers + avatars from the media cache.
//
// Also cleans up unused headers + avatars from the media cache and prunes orphaned items from storage.
//
// ---
// tags:

View File

@ -161,4 +161,6 @@ var Defaults = Configuration{
UserSweepFreq: time.Second * 10,
},
},
AdminMediaPruneDryRun: true,
}

View File

@ -72,6 +72,23 @@ func (m *mediaDB) GetRemoteOlderThan(ctx context.Context, olderThan time.Time, l
return attachments, nil
}
func (m *mediaDB) CountRemoteOlderThan(ctx context.Context, olderThan time.Time) (int, db.Error) {
q := m.conn.
NewSelect().
TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")).
Column("media_attachment.id").
Where("? = ?", bun.Ident("media_attachment.cached"), true).
Where("? < ?", bun.Ident("media_attachment.created_at"), olderThan).
WhereGroup(" AND ", whereNotEmptyAndNotNull("media_attachment.remote_url"))
count, err := q.Count(ctx)
if err != nil {
return 0, m.conn.ProcessError(err)
}
return count, nil
}
func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
attachments := []*gtsmodel.MediaAttachment{}
@ -98,7 +115,7 @@ func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit
return attachments, nil
}
func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
attachments := []*gtsmodel.MediaAttachment{}
q := m.newMediaQ(&attachments).
@ -107,11 +124,8 @@ func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan tim
Where("? = ?", bun.Ident("media_attachment.header"), false).
Where("? < ?", bun.Ident("media_attachment.created_at"), olderThan).
Where("? IS NULL", bun.Ident("media_attachment.remote_url")).
Where("? IS NULL", bun.Ident("media_attachment.status_id"))
if maxID != "" {
q = q.Where("? < ?", bun.Ident("media_attachment.id"), maxID)
}
Where("? IS NULL", bun.Ident("media_attachment.status_id")).
Order("media_attachment.created_at DESC")
if limit != 0 {
q = q.Limit(limit)
@ -123,3 +137,23 @@ func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan tim
return attachments, nil
}
func (m *mediaDB) CountLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time) (int, db.Error) {
q := m.conn.
NewSelect().
TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")).
Column("media_attachment.id").
Where("? = ?", bun.Ident("media_attachment.cached"), true).
Where("? = ?", bun.Ident("media_attachment.avatar"), false).
Where("? = ?", bun.Ident("media_attachment.header"), false).
Where("? < ?", bun.Ident("media_attachment.created_at"), olderThan).
Where("? IS NULL", bun.Ident("media_attachment.remote_url")).
Where("? IS NULL", bun.Ident("media_attachment.status_id"))
count, err := q.Count(ctx)
if err != nil {
return 0, m.conn.ProcessError(err)
}
return count, nil
}

View File

@ -55,7 +55,7 @@ func (suite *MediaTestSuite) TestGetAvisAndHeaders() {
func (suite *MediaTestSuite) TestGetLocalUnattachedOlderThan() {
ctx := context.Background()
attachments, err := suite.db.GetLocalUnattachedOlderThan(ctx, testrig.TimeMustParse("2090-06-04T13:12:00Z"), "", 10)
attachments, err := suite.db.GetLocalUnattachedOlderThan(ctx, testrig.TimeMustParse("2090-06-04T13:12:00Z"), 10)
suite.NoError(err)
suite.Len(attachments, 1)
}

View File

@ -37,6 +37,11 @@ type Media interface {
// In other words, media attachments that originated remotely, and that we currently have cached locally.
GetRemoteOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, Error)
// CountRemoteOlderThan is like GetRemoteOlderThan, except instead of getting limit n attachments,
// it just counts how many remote attachments in the database (including avatars and headers) meet
// the olderThan criteria.
CountRemoteOlderThan(ctx context.Context, olderThan time.Time) (int, Error)
// GetAvatarsAndHeaders fetches limit n avatars and headers with an id < maxID. These headers
// and avis may be in use or not; the caller should check this if it's important.
GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error)
@ -44,5 +49,11 @@ type Media interface {
// GetLocalUnattachedOlderThan fetches limit n local media attachments (including avatars and headers), older than
// the given time, which aren't header or avatars, and aren't attached to a status. In other words, attachments which were
// uploaded but never used for whatever reason, or attachments that were attached to a status which was subsequently deleted.
GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error)
//
// These will be returned in order of attachment.created_at descending (newest to oldest in other words).
GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, Error)
// CountLocalUnattachedOlderThan is like GetLocalUnattachedOlderThan, except instead of getting limit n attachments,
// it just counts how many local attachments in the database meet the olderThan criteria.
CountLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time) (int, Error)
}

73
internal/media/cron.go Normal file
View File

@ -0,0 +1,73 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media
import (
"context"
"fmt"
"time"
"github.com/robfig/cron/v3"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/log"
)
type cronLogger struct{}
func (l *cronLogger) Info(msg string, keysAndValues ...interface{}) {
log.Info("media manager cron logger: ", msg, keysAndValues)
}
func (l *cronLogger) Error(err error, msg string, keysAndValues ...interface{}) {
log.Error("media manager cron logger: ", err, msg, keysAndValues)
}
func scheduleCleanup(m *manager) error {
pruneCtx, pruneCancel := context.WithCancel(context.Background())
c := cron.New(cron.WithLogger(new(cronLogger)))
defer c.Start()
if _, err := c.AddFunc("@midnight", func() {
if err := m.PruneAll(pruneCtx, config.GetMediaRemoteCacheDays(), true); err != nil {
log.Error(err)
return
}
}); err != nil {
pruneCancel()
return fmt.Errorf("error starting media manager cleanup job: %s", err)
}
m.stopCronJobs = func() error {
// Try to stop jobs gracefully by waiting til they're finished.
stopCtx := c.Stop()
select {
case <-stopCtx.Done():
log.Infof("media manager: cron finished jobs and stopped gracefully")
case <-time.After(1 * time.Minute):
log.Warnf("media manager: cron didn't stop after 60 seconds, force closing jobs")
pruneCancel()
}
return nil
}
return nil
}

View File

@ -21,22 +21,24 @@ package media
import (
"context"
"fmt"
"time"
"github.com/robfig/cron/v3"
"github.com/superseriousbusiness/gotosocial/internal/concurrency"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/storage"
)
// selectPruneLimit is the amount of media entries to select at a time from the db when pruning
const selectPruneLimit = 20
var SupportedMIMETypes = []string{
mimeImageJpeg,
mimeImageGif,
mimeImagePng,
mimeImageWebp,
mimeVideoMp4,
}
// UnusedLocalAttachmentCacheDays is the amount of days to keep local media in storage if it
// is not attached to a status, or was never attached to a status.
const UnusedLocalAttachmentCacheDays = 3
var SupportedEmojiMIMETypes = []string{
mimeImageGif,
mimeImagePng,
}
// Manager provides an interface for managing media: parsing, storing, and retrieving media objects like photos, videos, and gifs.
type Manager interface {
@ -85,25 +87,36 @@ type Manager interface {
RecacheMedia(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, attachmentID string) (*ProcessingMedia, error)
/*
PRUNING FUNCTIONS
PRUNING/UNCACHING FUNCTIONS
*/
// PruneAllRemote prunes all remote media attachments cached on this instance which are older than the given amount of days.
// 'Pruning' in this context means removing the locally stored data of the attachment (both thumbnail and full size),
// and setting 'cached' to false on the associated attachment.
// PruneAll runs all of the below pruning/uncacheing functions, and then cleans up any resulting
// empty directories from the storage driver. It can be called as a shortcut for calling the below
// pruning functions one by one.
//
// If blocking is true, then any errors encountered during the prune will be combined + returned to
// the caller. If blocking is false, the prune is run in the background and errors are just logged
// instead.
PruneAll(ctx context.Context, mediaCacheRemoteDays int, blocking bool) error
// UncacheRemote uncaches all remote media attachments older than the given amount of days.
//
// In this context, uncacheing means deleting media files from storage and marking the attachment
// as cached=false in the database.
//
// If 'dry' is true, then only a dry run will be performed: nothing will actually be changed.
//
// The returned int is the amount of media that was/would be uncached by this function.
UncacheRemote(ctx context.Context, olderThanDays int, dry bool) (int, error)
// PruneUnusedRemote prunes unused/out of date headers and avatars cached on this instance.
//
// The returned int is the amount of media that was pruned by this function.
PruneAllRemote(ctx context.Context, olderThanDays int) (int, error)
// PruneAllMeta prunes unused/out of date headers and avatars cached on this instance.
//
// The returned int is the amount of media that was pruned by this function.
PruneAllMeta(ctx context.Context) (int, error)
// PruneUnusedLocalAttachments prunes unused media attachments that were uploaded by
PruneUnusedRemote(ctx context.Context, dry bool) (int, error)
// PruneUnusedLocal prunes unused media attachments that were uploaded by
// a user on this instance, but never actually attached to a status, or attached but
// later detached.
//
// The returned int is the amount of media that was pruned by this function.
PruneUnusedLocalAttachments(ctx context.Context) (int, error)
PruneUnusedLocal(ctx context.Context, dry bool) (int, error)
// PruneOrphaned prunes files that exist in storage but which do not have a corresponding
// entry in the database.
//
@ -145,7 +158,7 @@ func NewManager(database db.DB, storage *storage.Driver) (Manager, error) {
storage: storage,
}
// Prepare the media worker pool
// Prepare the media worker pool.
m.mediaWorker = concurrency.NewWorkerPool[*ProcessingMedia](-1, 10)
m.mediaWorker.SetProcessor(func(ctx context.Context, media *ProcessingMedia) error {
if _, err := media.LoadAttachment(ctx); err != nil {
@ -154,7 +167,7 @@ func NewManager(database db.DB, storage *storage.Driver) (Manager, error) {
return nil
})
// Prepare the emoji worker pool
// Prepare the emoji worker pool.
m.emojiWorker = concurrency.NewWorkerPool[*ProcessingEmoji](-1, 10)
m.emojiWorker.SetProcessor(func(ctx context.Context, emoji *ProcessingEmoji) error {
if _, err := emoji.LoadEmoji(ctx); err != nil {
@ -163,7 +176,7 @@ func NewManager(database db.DB, storage *storage.Driver) (Manager, error) {
return nil
})
// Start the worker pools
// Start the worker pools.
if err := m.mediaWorker.Start(); err != nil {
return nil, err
}
@ -171,7 +184,8 @@ func NewManager(database db.DB, storage *storage.Driver) (Manager, error) {
return nil, err
}
if err := scheduleCleanupJobs(m); err != nil {
// Schedule cron job(s) for clean up.
if err := scheduleCleanup(m); err != nil {
return nil, err
}
@ -206,7 +220,7 @@ func (m *manager) RecacheMedia(ctx context.Context, data DataFunc, postData Post
}
func (m *manager) Stop() error {
// Stop media and emoji worker pools
// Stop worker pools.
mediaErr := m.mediaWorker.Stop()
emojiErr := m.emojiWorker.Stop()
@ -223,70 +237,3 @@ func (m *manager) Stop() error {
return cronErr
}
func scheduleCleanupJobs(m *manager) error {
// create a new cron instance for scheduling cleanup jobs
c := cron.New(cron.WithLogger(&logrusWrapper{}))
pruneCtx, pruneCancel := context.WithCancel(context.Background())
if _, err := c.AddFunc("@midnight", func() {
begin := time.Now()
pruned, err := m.PruneAllMeta(pruneCtx)
if err != nil {
log.Errorf("media manager: error pruning meta: %s", err)
return
}
log.Infof("media manager: pruned %d meta entries in %s", pruned, time.Since(begin))
}); err != nil {
pruneCancel()
return fmt.Errorf("error starting media manager meta cleanup job: %s", err)
}
if _, err := c.AddFunc("@midnight", func() {
begin := time.Now()
pruned, err := m.PruneUnusedLocalAttachments(pruneCtx)
if err != nil {
log.Errorf("media manager: error pruning unused local attachments: %s", err)
return
}
log.Infof("media manager: pruned %d unused local attachments in %s", pruned, time.Since(begin))
}); err != nil {
pruneCancel()
return fmt.Errorf("error starting media manager unused local attachments cleanup job: %s", err)
}
// start remote cache cleanup cronjob if configured
if mediaRemoteCacheDays := config.GetMediaRemoteCacheDays(); mediaRemoteCacheDays > 0 {
if _, err := c.AddFunc("@midnight", func() {
begin := time.Now()
pruned, err := m.PruneAllRemote(pruneCtx, mediaRemoteCacheDays)
if err != nil {
log.Errorf("media manager: error pruning remote cache: %s", err)
return
}
log.Infof("media manager: pruned %d remote cache entries in %s", pruned, time.Since(begin))
}); err != nil {
pruneCancel()
return fmt.Errorf("error starting media manager remote cache cleanup job: %s", err)
}
}
// try to stop any jobs gracefully by waiting til they're finished
m.stopCronJobs = func() error {
cronCtx := c.Stop()
select {
case <-cronCtx.Done():
log.Infof("media manager: cron finished jobs and stopped gracefully")
case <-time.After(1 * time.Minute):
log.Infof("media manager: cron didn't stop after 60 seconds, will force close jobs")
break
}
pruneCancel()
return nil
}
c.Start()
return nil
}

View File

@ -82,10 +82,10 @@ import (
// type (the first of four ASCII letters) is lower-case.
const chunkTypeAncillaryBit = 0x20000000
// PNGAncillaryChunkStripper wraps another io.Reader to strip ancillary chunks,
// pngAncillaryChunkStripper wraps another io.Reader to strip ancillary chunks,
// if the data is in the PNG file format. If the data isn't PNG, it is passed
// through unmodified.
type PNGAncillaryChunkStripper struct {
type pngAncillaryChunkStripper struct {
// Reader is the wrapped io.Reader.
Reader io.Reader
@ -113,7 +113,7 @@ type PNGAncillaryChunkStripper struct {
}
// Read implements io.Reader.
func (r *PNGAncillaryChunkStripper) Read(p []byte) (int, error) {
func (r *pngAncillaryChunkStripper) Read(p []byte) (int, error) {
for {
// If the wrapped io.Reader returned a non-nil error, drain r.buffer
// (what data we have) and return that error (if fully drained).

View File

@ -239,7 +239,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error {
// .png image (requires ancillary chunk stripping)
case mimeImagePng:
fullImg, err = decodeImage(&PNGAncillaryChunkStripper{
fullImg, err = decodeImage(&pngAncillaryChunkStripper{
Reader: rc,
}, imaging.AutoOrientation(true))
if err != nil {

353
internal/media/prune.go Normal file
View File

@ -0,0 +1,353 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media
import (
"context"
"errors"
"fmt"
"time"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/uris"
)
const (
selectPruneLimit = 50 // Amount of media entries to select at a time from the db when pruning.
unusedLocalAttachmentDays = 3 // Number of days to keep local media in storage if not attached to a status.
)
func (m *manager) PruneAll(ctx context.Context, mediaCacheRemoteDays int, blocking bool) error {
const dry = false
f := func(innerCtx context.Context) error {
errs := gtserror.MultiError{}
pruned, err := m.PruneUnusedLocal(innerCtx, dry)
if err != nil {
errs = append(errs, fmt.Sprintf("error pruning unused local media (%s)", err))
} else {
log.Infof("pruned %d unused local media", pruned)
}
pruned, err = m.PruneUnusedRemote(innerCtx, dry)
if err != nil {
errs = append(errs, fmt.Sprintf("error pruning unused remote media: (%s)", err))
} else {
log.Infof("pruned %d unused remote media", pruned)
}
pruned, err = m.UncacheRemote(innerCtx, mediaCacheRemoteDays, dry)
if err != nil {
errs = append(errs, fmt.Sprintf("error uncacheing remote media older than %d day(s): (%s)", mediaCacheRemoteDays, err))
} else {
log.Infof("uncached %d remote media older than %d day(s)", pruned, mediaCacheRemoteDays)
}
pruned, err = m.PruneOrphaned(innerCtx, dry)
if err != nil {
errs = append(errs, fmt.Sprintf("error pruning orphaned media: (%s)", err))
} else {
log.Infof("pruned %d orphaned media", pruned)
}
if err := m.storage.Storage.Clean(innerCtx); err != nil {
errs = append(errs, fmt.Sprintf("error cleaning storage: (%s)", err))
} else {
log.Info("cleaned storage")
}
return errs.Combine()
}
if blocking {
return f(ctx)
}
go func() {
if err := f(context.Background()); err != nil {
log.Error(err)
}
}()
return nil
}
func (m *manager) PruneUnusedRemote(ctx context.Context, dry bool) (int, error) {
var (
totalPruned int
maxID string
attachments []*gtsmodel.MediaAttachment
err error
)
// We don't know in advance how many remote attachments will meet
// our criteria for being 'unused'. So a dry run in this case just
// means we iterate through as normal, but do nothing with each entry
// instead of removing it. Define this here so we don't do the 'if dry'
// check inside the loop a million times.
var f func(ctx context.Context, attachment *gtsmodel.MediaAttachment) error
if !dry {
f = m.deleteAttachment
} else {
f = func(_ context.Context, _ *gtsmodel.MediaAttachment) error {
return nil // noop
}
}
for attachments, err = m.db.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) {
maxID = attachments[len(attachments)-1].ID // use the id of the last attachment in the slice as the next 'maxID' value
// Prune each attachment that meets one of the following criteria:
// - Has no owning account in the database.
// - Is a header but isn't the owning account's current header.
// - Is an avatar but isn't the owning account's current avatar.
for _, attachment := range attachments {
if attachment.Account == nil ||
(*attachment.Header && attachment.ID != attachment.Account.HeaderMediaAttachmentID) ||
(*attachment.Avatar && attachment.ID != attachment.Account.AvatarMediaAttachmentID) {
if err := f(ctx, attachment); err != nil {
return totalPruned, err
}
totalPruned++
}
}
}
// Make sure we don't have a real error when we leave the loop.
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return totalPruned, err
}
return totalPruned, nil
}
func (m *manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) {
// keys in storage will look like the following:
// `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[MEDIA_ID].[EXTENSION]`
// We can filter out keys we're not interested in by
// matching through a regex.
var matchCount int
match := func(storageKey string) bool {
if regexes.FilePath.MatchString(storageKey) {
matchCount++
return true
}
return false
}
iterator, err := m.storage.Iterator(ctx, match) // make sure this iterator is always released
if err != nil {
return 0, fmt.Errorf("PruneOrphaned: error getting storage iterator: %w", err)
}
// Ensure we have some keys, and also advance
// the iterator to the first non-empty key.
if !iterator.Next() {
iterator.Release()
return 0, nil // nothing else to do here
}
// Emojis are stored under the instance account,
// so we need the ID of the instance account for
// the next part.
instanceAccount, err := m.db.GetInstanceAccount(ctx, "")
if err != nil {
iterator.Release()
return 0, fmt.Errorf("PruneOrphaned: error getting instance account: %w", err)
}
instanceAccountID := instanceAccount.ID
// For each key in the iterator, check if entry is orphaned.
orphanedKeys := make([]string, 0, matchCount)
for key := iterator.Key(); iterator.Next(); key = iterator.Key() {
orphaned, err := m.orphaned(ctx, key, instanceAccountID)
if err != nil {
iterator.Release()
return 0, fmt.Errorf("PruneOrphaned: checking orphaned status: %w", err)
}
if orphaned {
orphanedKeys = append(orphanedKeys, key)
}
}
iterator.Release()
totalPruned := len(orphanedKeys)
if dry {
// Dry run: don't remove anything.
return totalPruned, nil
}
// This is not a drill!
// We have to delete stuff!
return totalPruned, m.removeFiles(ctx, orphanedKeys...)
}
func (m *manager) orphaned(ctx context.Context, key string, instanceAccountID string) (bool, error) {
pathParts := regexes.FilePath.FindStringSubmatch(key)
if len(pathParts) != 6 {
// This doesn't match our expectations so
// it wasn't created by gts; ignore it.
return false, nil
}
var (
mediaType = pathParts[2]
mediaID = pathParts[4]
orphaned = false
)
// Look for keys in storage that we don't have an attachment for.
switch Type(mediaType) {
case TypeAttachment, TypeHeader, TypeAvatar:
if _, err := m.db.GetAttachmentByID(ctx, mediaID); err != nil {
if !errors.Is(err, db.ErrNoEntries) {
return false, fmt.Errorf("error calling GetAttachmentByID: %w", err)
}
orphaned = true
}
case TypeEmoji:
// Look using the static URL for the emoji. Emoji images can change, so
// the MEDIA_ID part of the key for emojis will not necessarily correspond
// to the file that's currently being used as the emoji image.
staticURL := uris.GenerateURIForAttachment(instanceAccountID, string(TypeEmoji), string(SizeStatic), mediaID, mimePng)
if _, err := m.db.GetEmojiByStaticURL(ctx, staticURL); err != nil {
if !errors.Is(err, db.ErrNoEntries) {
return false, fmt.Errorf("error calling GetEmojiByStaticURL: %w", err)
}
orphaned = true
}
}
return orphaned, nil
}
func (m *manager) UncacheRemote(ctx context.Context, olderThanDays int, dry bool) (int, error) {
if olderThanDays < 0 {
return 0, nil
}
olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(olderThanDays))
if dry {
// Dry run, just count eligible entries without removing them.
return m.db.CountRemoteOlderThan(ctx, olderThan)
}
var (
totalPruned int
attachments []*gtsmodel.MediaAttachment
err error
)
for attachments, err = m.db.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit) {
olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value
for _, attachment := range attachments {
if err := m.uncacheAttachment(ctx, attachment); err != nil {
return totalPruned, err
}
totalPruned++
}
}
// Make sure we don't have a real error when we leave the loop.
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return totalPruned, err
}
return totalPruned, nil
}
func (m *manager) PruneUnusedLocal(ctx context.Context, dry bool) (int, error) {
olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(unusedLocalAttachmentDays))
if dry {
// Dry run, just count eligible entries without removing them.
return m.db.CountLocalUnattachedOlderThan(ctx, olderThan)
}
var (
totalPruned int
attachments []*gtsmodel.MediaAttachment
err error
)
for attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit) {
olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value
for _, attachment := range attachments {
if err := m.deleteAttachment(ctx, attachment); err != nil {
return totalPruned, err
}
totalPruned++
}
}
// Make sure we don't have a real error when we leave the loop.
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return totalPruned, err
}
return totalPruned, nil
}
/*
Handy little helpers
*/
func (m *manager) deleteAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
if err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil {
return err
}
// Delete attachment completely.
return m.db.DeleteByID(ctx, attachment.ID, attachment)
}
func (m *manager) uncacheAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
if err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil {
return err
}
// Update attachment to reflect that we no longer have it cached.
attachment.UpdatedAt = time.Now()
cached := false
attachment.Cached = &cached
return m.db.UpdateByID(ctx, attachment, attachment.ID, "updated_at", "cached")
}
func (m *manager) removeFiles(ctx context.Context, keys ...string) error {
errs := make(gtserror.MultiError, 0, len(keys))
for _, key := range keys {
if err := m.storage.Delete(ctx, key); err != nil && !errors.Is(err, storage.ErrNotFound) {
errs = append(errs, "storage error removing "+key+": "+err.Error())
}
}
return errs.Combine()
}

View File

@ -0,0 +1,358 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media_test
import (
"bytes"
"context"
"io"
"os"
"testing"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/db"
gtsmodel "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
type PruneTestSuite struct {
MediaStandardTestSuite
}
func (suite *PruneTestSuite) TestPruneOrphanedDry() {
// add a big orphan panda to store
b, err := os.ReadFile("./test/big-panda.gif")
if err != nil {
suite.FailNow(err.Error())
}
pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil {
suite.FailNow(err.Error())
}
// dry run should show up 1 orphaned panda
totalPruned, err := suite.manager.PruneOrphaned(context.Background(), true)
suite.NoError(err)
suite.Equal(1, totalPruned)
// panda should still be in storage
hasKey, err := suite.storage.Has(context.Background(), pandaPath)
suite.NoError(err)
suite.True(hasKey)
}
func (suite *PruneTestSuite) TestPruneOrphanedMoist() {
// add a big orphan panda to store
b, err := os.ReadFile("./test/big-panda.gif")
if err != nil {
suite.FailNow(err.Error())
}
pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil {
suite.FailNow(err.Error())
}
// should show up 1 orphaned panda
totalPruned, err := suite.manager.PruneOrphaned(context.Background(), false)
suite.NoError(err)
suite.Equal(1, totalPruned)
// panda should no longer be in storage
hasKey, err := suite.storage.Has(context.Background(), pandaPath)
suite.NoError(err)
suite.False(hasKey)
}
func (suite *PruneTestSuite) TestPruneUnusedLocal() {
testAttachment := suite.testAttachments["local_account_1_unattached_1"]
suite.True(*testAttachment.Cached)
totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false)
suite.NoError(err)
suite.Equal(1, totalPruned)
_, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID)
suite.ErrorIs(err, db.ErrNoEntries)
}
func (suite *PruneTestSuite) TestPruneUnusedLocalDry() {
testAttachment := suite.testAttachments["local_account_1_unattached_1"]
suite.True(*testAttachment.Cached)
totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), true)
suite.NoError(err)
suite.Equal(1, totalPruned)
_, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID)
suite.NoError(err)
}
func (suite *PruneTestSuite) TestPruneRemoteTwice() {
totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false)
suite.NoError(err)
suite.Equal(1, totalPruned)
// final prune should prune nothing, since the first prune already happened
totalPrunedAgain, err := suite.manager.PruneUnusedLocal(context.Background(), false)
suite.NoError(err)
suite.Equal(0, totalPrunedAgain)
}
func (suite *PruneTestSuite) TestPruneOneNonExistent() {
ctx := context.Background()
testAttachment := suite.testAttachments["local_account_1_unattached_1"]
// Delete this attachment cached on disk
media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID)
suite.NoError(err)
suite.True(*media.Cached)
err = suite.storage.Delete(ctx, media.File.Path)
suite.NoError(err)
// Now attempt to prune for item with db entry no file
totalPruned, err := suite.manager.PruneUnusedLocal(ctx, false)
suite.NoError(err)
suite.Equal(1, totalPruned)
}
func (suite *PruneTestSuite) TestPruneUnusedRemote() {
ctx := context.Background()
// start by clearing zork's avatar + header
zorkOldAvatar := suite.testAttachments["local_account_1_avatar"]
zorkOldHeader := suite.testAttachments["local_account_1_avatar"]
zork := suite.testAccounts["local_account_1"]
zork.AvatarMediaAttachmentID = ""
zork.HeaderMediaAttachmentID = ""
if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil {
panic(err)
}
totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false)
suite.NoError(err)
suite.Equal(2, totalPruned)
// media should no longer be stored
_, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldHeader.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
// attachments should no longer be in the db
_, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID)
suite.ErrorIs(err, db.ErrNoEntries)
_, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID)
suite.ErrorIs(err, db.ErrNoEntries)
}
func (suite *PruneTestSuite) TestPruneUnusedRemoteTwice() {
ctx := context.Background()
// start by clearing zork's avatar + header
zork := suite.testAccounts["local_account_1"]
zork.AvatarMediaAttachmentID = ""
zork.HeaderMediaAttachmentID = ""
if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil {
panic(err)
}
totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false)
suite.NoError(err)
suite.Equal(2, totalPruned)
// final prune should prune nothing, since the first prune already happened
totalPruned, err = suite.manager.PruneUnusedRemote(ctx, false)
suite.NoError(err)
suite.Equal(0, totalPruned)
}
func (suite *PruneTestSuite) TestPruneUnusedRemoteMultipleAccounts() {
ctx := context.Background()
// start by clearing zork's avatar + header
zorkOldAvatar := suite.testAttachments["local_account_1_avatar"]
zorkOldHeader := suite.testAttachments["local_account_1_avatar"]
zork := suite.testAccounts["local_account_1"]
zork.AvatarMediaAttachmentID = ""
zork.HeaderMediaAttachmentID = ""
if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil {
panic(err)
}
// set zork's unused header as belonging to turtle
turtle := suite.testAccounts["local_account_1"]
zorkOldHeader.AccountID = turtle.ID
if err := suite.db.UpdateByID(ctx, zorkOldHeader, zorkOldHeader.ID, "account_id"); err != nil {
panic(err)
}
totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false)
suite.NoError(err)
suite.Equal(2, totalPruned)
// media should no longer be stored
_, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldHeader.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
// attachments should no longer be in the db
_, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID)
suite.ErrorIs(err, db.ErrNoEntries)
_, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID)
suite.ErrorIs(err, db.ErrNoEntries)
}
func (suite *PruneTestSuite) TestUncacheRemote() {
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
suite.True(*testStatusAttachment.Cached)
testHeader := suite.testAttachments["remote_account_3_header"]
suite.True(*testHeader.Cached)
totalUncached, err := suite.manager.UncacheRemote(context.Background(), 1, false)
suite.NoError(err)
suite.Equal(2, totalUncached)
uncachedAttachment, err := suite.db.GetAttachmentByID(context.Background(), testStatusAttachment.ID)
suite.NoError(err)
suite.False(*uncachedAttachment.Cached)
uncachedAttachment, err = suite.db.GetAttachmentByID(context.Background(), testHeader.ID)
suite.NoError(err)
suite.False(*uncachedAttachment.Cached)
}
func (suite *PruneTestSuite) TestUncacheRemoteDry() {
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
suite.True(*testStatusAttachment.Cached)
testHeader := suite.testAttachments["remote_account_3_header"]
suite.True(*testHeader.Cached)
totalUncached, err := suite.manager.UncacheRemote(context.Background(), 1, true)
suite.NoError(err)
suite.Equal(2, totalUncached)
uncachedAttachment, err := suite.db.GetAttachmentByID(context.Background(), testStatusAttachment.ID)
suite.NoError(err)
suite.True(*uncachedAttachment.Cached)
uncachedAttachment, err = suite.db.GetAttachmentByID(context.Background(), testHeader.ID)
suite.NoError(err)
suite.True(*uncachedAttachment.Cached)
}
func (suite *PruneTestSuite) TestUncacheRemoteTwice() {
totalUncached, err := suite.manager.UncacheRemote(context.Background(), 1, false)
suite.NoError(err)
suite.Equal(2, totalUncached)
// final uncache should uncache nothing, since the first uncache already happened
totalUncachedAgain, err := suite.manager.UncacheRemote(context.Background(), 1, false)
suite.NoError(err)
suite.Equal(0, totalUncachedAgain)
}
func (suite *PruneTestSuite) TestUncacheAndRecache() {
ctx := context.Background()
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
testHeader := suite.testAttachments["remote_account_3_header"]
totalUncached, err := suite.manager.UncacheRemote(ctx, 1, false)
suite.NoError(err)
suite.Equal(2, totalUncached)
// media should no longer be stored
_, err = suite.storage.Get(ctx, testStatusAttachment.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testStatusAttachment.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testHeader.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testHeader.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
// now recache the image....
data := func(_ context.Context) (io.ReadCloser, int64, error) {
// load bytes from a test image
b, err := os.ReadFile("../../testrig/media/thoughtsofdog-original.jpg")
if err != nil {
panic(err)
}
return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil
}
for _, original := range []*gtsmodel.MediaAttachment{
testStatusAttachment,
testHeader,
} {
processingRecache, err := suite.manager.RecacheMedia(ctx, data, nil, original.ID)
suite.NoError(err)
// synchronously load the recached attachment
recachedAttachment, err := processingRecache.LoadAttachment(ctx)
suite.NoError(err)
suite.NotNil(recachedAttachment)
// recachedAttachment should be basically the same as the old attachment
suite.True(*recachedAttachment.Cached)
suite.Equal(original.ID, recachedAttachment.ID)
suite.Equal(original.File.Path, recachedAttachment.File.Path) // file should be stored in the same place
suite.Equal(original.Thumbnail.Path, recachedAttachment.Thumbnail.Path) // as should the thumbnail
suite.EqualValues(original.FileMeta, recachedAttachment.FileMeta) // and the filemeta should be the same
// recached files should be back in storage
_, err = suite.storage.Get(ctx, recachedAttachment.File.Path)
suite.NoError(err)
_, err = suite.storage.Get(ctx, recachedAttachment.Thumbnail.Path)
suite.NoError(err)
}
}
func (suite *PruneTestSuite) TestUncacheOneNonExistent() {
ctx := context.Background()
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
// Delete this attachment cached on disk
media, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID)
suite.NoError(err)
suite.True(*media.Cached)
err = suite.storage.Delete(ctx, media.File.Path)
suite.NoError(err)
// Now attempt to uncache remote for item with db entry no file
totalUncached, err := suite.manager.UncacheRemote(ctx, 1, false)
suite.NoError(err)
suite.Equal(2, totalUncached)
}
func TestPruneOrphanedTestSuite(t *testing.T) {
suite.Run(t, &PruneTestSuite{})
}

View File

@ -1,89 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media
import (
"context"
"errors"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
)
func (m *manager) PruneAllMeta(ctx context.Context) (int, error) {
var (
totalPruned int
maxID string
)
for {
// select "selectPruneLimit" headers / avatars at a time for pruning
attachments, err := m.db.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return totalPruned, err
} else if len(attachments) == 0 {
break
}
// use the id of the last attachment in the slice as the next 'maxID' value
log.Tracef("PruneAllMeta: got %d attachments with maxID < %s", len(attachments), maxID)
maxID = attachments[len(attachments)-1].ID
// prune each attachment that meets one of the following criteria:
// - has no owning account in the database
// - is a header but isn't the owning account's current header
// - is an avatar but isn't the owning account's current avatar
for _, attachment := range attachments {
if attachment.Account == nil ||
(*attachment.Header && attachment.ID != attachment.Account.HeaderMediaAttachmentID) ||
(*attachment.Avatar && attachment.ID != attachment.Account.AvatarMediaAttachmentID) {
if err := m.pruneOneAvatarOrHeader(ctx, attachment); err != nil {
return totalPruned, err
}
totalPruned++
}
}
}
log.Infof("PruneAllMeta: finished pruning avatars + headers: pruned %d entries", totalPruned)
return totalPruned, nil
}
func (m *manager) pruneOneAvatarOrHeader(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
if attachment.File.Path != "" {
// delete the full size attachment from storage
log.Tracef("pruneOneAvatarOrHeader: deleting %s", attachment.File.Path)
if err := m.storage.Delete(ctx, attachment.File.Path); err != nil && err != storage.ErrNotFound {
return err
}
}
if attachment.Thumbnail.Path != "" {
// delete the thumbnail from storage
log.Tracef("pruneOneAvatarOrHeader: deleting %s", attachment.Thumbnail.Path)
if err := m.storage.Delete(ctx, attachment.Thumbnail.Path); err != nil && err != storage.ErrNotFound {
return err
}
}
// delete the attachment entry completely
return m.db.DeleteByID(ctx, attachment.ID, &gtsmodel.MediaAttachment{})
}

View File

@ -1,132 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media_test
import (
"context"
"testing"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/db"
)
type PruneMetaTestSuite struct {
MediaStandardTestSuite
}
func (suite *PruneMetaTestSuite) TestPruneMeta() {
ctx := context.Background()
// start by clearing zork's avatar + header
zorkOldAvatar := suite.testAttachments["local_account_1_avatar"]
zorkOldHeader := suite.testAttachments["local_account_1_avatar"]
zork := suite.testAccounts["local_account_1"]
zork.AvatarMediaAttachmentID = ""
zork.HeaderMediaAttachmentID = ""
if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil {
panic(err)
}
totalPruned, err := suite.manager.PruneAllMeta(ctx)
suite.NoError(err)
suite.Equal(2, totalPruned)
// media should no longer be stored
_, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldHeader.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
// attachments should no longer be in the db
_, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID)
suite.ErrorIs(err, db.ErrNoEntries)
_, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID)
suite.ErrorIs(err, db.ErrNoEntries)
}
func (suite *PruneMetaTestSuite) TestPruneMetaTwice() {
ctx := context.Background()
// start by clearing zork's avatar + header
zork := suite.testAccounts["local_account_1"]
zork.AvatarMediaAttachmentID = ""
zork.HeaderMediaAttachmentID = ""
if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil {
panic(err)
}
totalPruned, err := suite.manager.PruneAllMeta(ctx)
suite.NoError(err)
suite.Equal(2, totalPruned)
// final prune should prune nothing, since the first prune already happened
totalPruned, err = suite.manager.PruneAllMeta(ctx)
suite.NoError(err)
suite.Equal(0, totalPruned)
}
func (suite *PruneMetaTestSuite) TestPruneMetaMultipleAccounts() {
ctx := context.Background()
// start by clearing zork's avatar + header
zorkOldAvatar := suite.testAttachments["local_account_1_avatar"]
zorkOldHeader := suite.testAttachments["local_account_1_avatar"]
zork := suite.testAccounts["local_account_1"]
zork.AvatarMediaAttachmentID = ""
zork.HeaderMediaAttachmentID = ""
if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil {
panic(err)
}
// set zork's unused header as belonging to turtle
turtle := suite.testAccounts["local_account_1"]
zorkOldHeader.AccountID = turtle.ID
if err := suite.db.UpdateByID(ctx, zorkOldHeader, zorkOldHeader.ID, "account_id"); err != nil {
panic(err)
}
totalPruned, err := suite.manager.PruneAllMeta(ctx)
suite.NoError(err)
suite.Equal(2, totalPruned)
// media should no longer be stored
_, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldHeader.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
// attachments should no longer be in the db
_, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID)
suite.ErrorIs(err, db.ErrNoEntries)
_, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID)
suite.ErrorIs(err, db.ErrNoEntries)
}
func TestPruneMetaTestSuite(t *testing.T) {
suite.Run(t, &PruneMetaTestSuite{})
}

View File

@ -1,138 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media
import (
"context"
"errors"
"fmt"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/uris"
)
func (m *manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) {
var totalPruned int
// keys in storage will look like the following:
// `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[MEDIA_ID].[EXTENSION]`
// we can filter out keys we're not interested in by
// matching through a regex
var matchCount int
match := func(storageKey string) bool {
if regexes.FilePath.MatchString(storageKey) {
matchCount++
return true
}
return false
}
log.Info("checking storage keys for orphaned pruning candidates...")
iterator, err := m.storage.Iterator(ctx, match)
if err != nil {
return 0, fmt.Errorf("PruneOrphaned: error getting storage iterator: %w", err)
}
// make sure we have some keys, and also advance
// the iterator to the first non-empty key
if !iterator.Next() {
return 0, nil
}
instanceAccount, err := m.db.GetInstanceAccount(ctx, "")
if err != nil {
return 0, fmt.Errorf("PruneOrphaned: error getting instance account: %w", err)
}
instanceAccountID := instanceAccount.ID
// for each key in the iterator, check if entry is orphaned
log.Info("got %d orphaned pruning candidates, checking for orphaned status, please wait...")
var checkedKeys int
orphanedKeys := make([]string, 0, matchCount)
for key := iterator.Key(); iterator.Next(); key = iterator.Key() {
if m.orphaned(ctx, key, instanceAccountID) {
orphanedKeys = append(orphanedKeys, key)
}
checkedKeys++
if checkedKeys%50 == 0 {
log.Infof("checked %d of %d orphaned pruning candidates...", checkedKeys, matchCount)
}
}
iterator.Release()
if !dry {
// the real deal, we have to delete stuff
for _, key := range orphanedKeys {
log.Infof("key %s corresponds to orphaned media, will remove it now", key)
if err := m.storage.Delete(ctx, key); err != nil {
log.Errorf("error deleting item with key %s from storage: %s", key, err)
continue
}
totalPruned++
}
} else {
// just a dry run, don't delete anything
for _, key := range orphanedKeys {
log.Infof("DRY RUN: key %s corresponds to orphaned media which would be deleted", key)
totalPruned++
}
}
return totalPruned, nil
}
func (m *manager) orphaned(ctx context.Context, key string, instanceAccountID string) bool {
pathParts := regexes.FilePath.FindStringSubmatch(key)
if len(pathParts) != 6 {
return false
}
mediaType := pathParts[2]
mediaID := pathParts[4]
var orphaned bool
switch Type(mediaType) {
case TypeAttachment, TypeHeader, TypeAvatar:
if _, err := m.db.GetAttachmentByID(ctx, mediaID); err != nil {
if errors.Is(err, db.ErrNoEntries) {
orphaned = true
} else {
log.Errorf("orphaned: error calling GetAttachmentByID: %s", err)
}
}
case TypeEmoji:
// look using the static URL for the emoji, since the MEDIA_ID part of
// the key for emojis will not necessarily correspond to the file that's
// currently being used as the emoji image
staticURI := uris.GenerateURIForAttachment(instanceAccountID, string(TypeEmoji), string(SizeStatic), mediaID, mimePng)
if _, err := m.db.GetEmojiByStaticURL(ctx, staticURI); err != nil {
if errors.Is(err, db.ErrNoEntries) {
orphaned = true
} else {
log.Errorf("orphaned: error calling GetEmojiByID: %s", err)
}
}
default:
orphaned = true
}
return orphaned
}

View File

@ -1,82 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media_test
import (
"bytes"
"context"
"os"
"testing"
"github.com/stretchr/testify/suite"
)
type PruneOrphanedTestSuite struct {
MediaStandardTestSuite
}
func (suite *PruneOrphanedTestSuite) TestPruneOrphanedDry() {
// add a big orphan panda to store
b, err := os.ReadFile("./test/big-panda.gif")
if err != nil {
panic(err)
}
pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachments/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
if _, err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil {
panic(err)
}
// dry run should show up 1 orphaned panda
totalPruned, err := suite.manager.PruneOrphaned(context.Background(), true)
suite.NoError(err)
suite.Equal(1, totalPruned)
// panda should still be in storage
hasKey, err := suite.storage.Has(context.Background(), pandaPath)
suite.NoError(err)
suite.True(hasKey)
}
func (suite *PruneOrphanedTestSuite) TestPruneOrphanedMoist() {
// add a big orphan panda to store
b, err := os.ReadFile("./test/big-panda.gif")
if err != nil {
panic(err)
}
pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachments/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
if _, err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil {
panic(err)
}
// should show up 1 orphaned panda
totalPruned, err := suite.manager.PruneOrphaned(context.Background(), false)
suite.NoError(err)
suite.Equal(1, totalPruned)
// panda should no longer be in storage
hasKey, err := suite.storage.Has(context.Background(), pandaPath)
suite.NoError(err)
suite.False(hasKey)
}
func TestPruneOrphanedTestSuite(t *testing.T) {
suite.Run(t, &PruneOrphanedTestSuite{})
}

View File

@ -1,95 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media
import (
"context"
"errors"
"time"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
)
func (m *manager) PruneAllRemote(ctx context.Context, olderThanDays int) (int, error) {
var totalPruned int
olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(olderThanDays))
log.Infof("PruneAllRemote: pruning media older than %s", olderThan)
for {
// Select "selectPruneLimit" status attacchments at a time for pruning
attachments, err := m.db.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return totalPruned, err
} else if len(attachments) == 0 {
break
}
// use the age of the oldest attachment (last in slice) as the next 'olderThan' value
log.Tracef("PruneAllRemote: got %d status attachments older than %s", len(attachments), olderThan)
olderThan = attachments[len(attachments)-1].CreatedAt
// prune each status attachment
for _, attachment := range attachments {
if err := m.pruneOneRemote(ctx, attachment); err != nil {
return totalPruned, err
}
totalPruned++
}
}
log.Infof("PruneAllRemote: finished pruning remote media: pruned %d entries", totalPruned)
return totalPruned, nil
}
func (m *manager) pruneOneRemote(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
var changed bool
if attachment.File.Path != "" {
// delete the full size attachment from storage
log.Tracef("pruneOneRemote: deleting %s", attachment.File.Path)
if err := m.storage.Delete(ctx, attachment.File.Path); err != nil && !errors.Is(err, storage.ErrNotFound) {
return err
}
cached := false
attachment.Cached = &cached
changed = true
}
if attachment.Thumbnail.Path != "" {
// delete the thumbnail from storage
log.Tracef("pruneOneRemote: deleting %s", attachment.Thumbnail.Path)
if err := m.storage.Delete(ctx, attachment.Thumbnail.Path); err != nil && !errors.Is(err, storage.ErrNotFound) {
return err
}
cached := false
attachment.Cached = &cached
changed = true
}
if !changed {
return nil
}
// update the attachment to reflect that we no longer have it cached
return m.db.UpdateByID(ctx, attachment, attachment.ID, "updated_at", "cached")
}

View File

@ -1,143 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media_test
import (
"bytes"
"context"
"io"
"os"
"testing"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
type PruneRemoteTestSuite struct {
MediaStandardTestSuite
}
func (suite *PruneRemoteTestSuite) TestPruneRemote() {
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
suite.True(*testStatusAttachment.Cached)
testHeader := suite.testAttachments["remote_account_3_header"]
suite.True(*testHeader.Cached)
totalPruned, err := suite.manager.PruneAllRemote(context.Background(), 1)
suite.NoError(err)
suite.Equal(2, totalPruned)
prunedAttachment, err := suite.db.GetAttachmentByID(context.Background(), testStatusAttachment.ID)
suite.NoError(err)
suite.False(*prunedAttachment.Cached)
prunedAttachment, err = suite.db.GetAttachmentByID(context.Background(), testHeader.ID)
suite.NoError(err)
suite.False(*prunedAttachment.Cached)
}
func (suite *PruneRemoteTestSuite) TestPruneRemoteTwice() {
totalPruned, err := suite.manager.PruneAllRemote(context.Background(), 1)
suite.NoError(err)
suite.Equal(2, totalPruned)
// final prune should prune nothing, since the first prune already happened
totalPrunedAgain, err := suite.manager.PruneAllRemote(context.Background(), 1)
suite.NoError(err)
suite.Equal(0, totalPrunedAgain)
}
func (suite *PruneRemoteTestSuite) TestPruneAndRecache() {
ctx := context.Background()
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
testHeader := suite.testAttachments["remote_account_3_header"]
totalPruned, err := suite.manager.PruneAllRemote(ctx, 1)
suite.NoError(err)
suite.Equal(2, totalPruned)
// media should no longer be stored
_, err = suite.storage.Get(ctx, testStatusAttachment.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testStatusAttachment.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testHeader.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testHeader.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
// now recache the image....
data := func(_ context.Context) (io.ReadCloser, int64, error) {
// load bytes from a test image
b, err := os.ReadFile("../../testrig/media/thoughtsofdog-original.jpg")
if err != nil {
panic(err)
}
return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil
}
for _, original := range []*gtsmodel.MediaAttachment{
testStatusAttachment,
testHeader,
} {
processingRecache, err := suite.manager.RecacheMedia(ctx, data, nil, original.ID)
suite.NoError(err)
// synchronously load the recached attachment
recachedAttachment, err := processingRecache.LoadAttachment(ctx)
suite.NoError(err)
suite.NotNil(recachedAttachment)
// recachedAttachment should be basically the same as the old attachment
suite.True(*recachedAttachment.Cached)
suite.Equal(original.ID, recachedAttachment.ID)
suite.Equal(original.File.Path, recachedAttachment.File.Path) // file should be stored in the same place
suite.Equal(original.Thumbnail.Path, recachedAttachment.Thumbnail.Path) // as should the thumbnail
suite.EqualValues(original.FileMeta, recachedAttachment.FileMeta) // and the filemeta should be the same
// recached files should be back in storage
_, err = suite.storage.Get(ctx, recachedAttachment.File.Path)
suite.NoError(err)
_, err = suite.storage.Get(ctx, recachedAttachment.Thumbnail.Path)
suite.NoError(err)
}
}
func (suite *PruneRemoteTestSuite) TestPruneOneNonExistent() {
ctx := context.Background()
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
// Delete this attachment cached on disk
media, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID)
suite.NoError(err)
suite.True(*media.Cached)
err = suite.storage.Delete(ctx, media.File.Path)
suite.NoError(err)
// Now attempt to prune remote for item with db entry no file
totalPruned, err := suite.manager.PruneAllRemote(ctx, 1)
suite.NoError(err)
suite.Equal(2, totalPruned)
}
func TestPruneRemoteTestSuite(t *testing.T) {
suite.Run(t, &PruneRemoteTestSuite{})
}

View File

@ -1,83 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media
import (
"context"
"time"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
)
func (m *manager) PruneUnusedLocalAttachments(ctx context.Context) (int, error) {
var totalPruned int
var maxID string
var attachments []*gtsmodel.MediaAttachment
var err error
olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(UnusedLocalAttachmentCacheDays))
log.Infof("PruneUnusedLocalAttachments: pruning unused local attachments older than %s", olderThan)
// select 20 attachments at a time and prune them
for attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, maxID, selectPruneLimit) {
// use the id of the last attachment in the slice as the next 'maxID' value
l := len(attachments)
maxID = attachments[l-1].ID
log.Tracef("PruneUnusedLocalAttachments: got %d unused local attachments older than %s with maxID < %s", l, olderThan, maxID)
for _, attachment := range attachments {
if err := m.pruneOneLocal(ctx, attachment); err != nil {
return totalPruned, err
}
totalPruned++
}
}
// make sure we don't have a real error when we leave the loop
if err != nil && err != db.ErrNoEntries {
return totalPruned, err
}
log.Infof("PruneUnusedLocalAttachments: finished pruning: pruned %d entries", totalPruned)
return totalPruned, nil
}
func (m *manager) pruneOneLocal(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
if attachment.File.Path != "" {
// delete the full size attachment from storage
log.Tracef("pruneOneLocal: deleting %s", attachment.File.Path)
if err := m.storage.Delete(ctx, attachment.File.Path); err != nil && err != storage.ErrNotFound {
return err
}
}
if attachment.Thumbnail.Path != "" {
// delete the thumbnail from storage
log.Tracef("pruneOneLocal: deleting %s", attachment.Thumbnail.Path)
if err := m.storage.Delete(ctx, attachment.Thumbnail.Path); err != nil && err != storage.ErrNotFound {
return err
}
}
// delete the attachment completely
return m.db.DeleteByID(ctx, attachment.ID, attachment)
}

View File

@ -1,75 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media_test
import (
"context"
"testing"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/db"
)
type PruneUnusedLocalTestSuite struct {
MediaStandardTestSuite
}
func (suite *PruneUnusedLocalTestSuite) TestPruneUnusedLocal() {
testAttachment := suite.testAttachments["local_account_1_unattached_1"]
suite.True(*testAttachment.Cached)
totalPruned, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
suite.NoError(err)
suite.Equal(1, totalPruned)
_, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID)
suite.ErrorIs(err, db.ErrNoEntries)
}
func (suite *PruneUnusedLocalTestSuite) TestPruneRemoteTwice() {
totalPruned, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
suite.NoError(err)
suite.Equal(1, totalPruned)
// final prune should prune nothing, since the first prune already happened
totalPrunedAgain, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
suite.NoError(err)
suite.Equal(0, totalPrunedAgain)
}
func (suite *PruneUnusedLocalTestSuite) TestPruneOneNonExistent() {
ctx := context.Background()
testAttachment := suite.testAttachments["local_account_1_unattached_1"]
// Delete this attachment cached on disk
media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID)
suite.NoError(err)
suite.True(*media.Cached)
err = suite.storage.Delete(ctx, media.File.Path)
suite.NoError(err)
// Now attempt to prune for item with db entry no file
totalPruned, err := suite.manager.PruneUnusedLocalAttachments(ctx)
suite.NoError(err)
suite.Equal(1, totalPruned)
}
func TestPruneUnusedLocalTestSuite(t *testing.T) {
suite.Run(t, &PruneUnusedLocalTestSuite{})
}

View File

@ -1,79 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media
import (
"fmt"
"github.com/superseriousbusiness/gotosocial/internal/log"
)
var SupportedMIMETypes = []string{
mimeImageJpeg,
mimeImageGif,
mimeImagePng,
mimeImageWebp,
mimeVideoMp4,
}
var SupportedEmojiMIMETypes = []string{
mimeImageGif,
mimeImagePng,
}
// ParseMediaType converts s to a recognized MediaType, or returns an error if unrecognized
func ParseMediaType(s string) (Type, error) {
switch s {
case string(TypeAttachment):
return TypeAttachment, nil
case string(TypeHeader):
return TypeHeader, nil
case string(TypeAvatar):
return TypeAvatar, nil
case string(TypeEmoji):
return TypeEmoji, nil
}
return "", fmt.Errorf("%s not a recognized MediaType", s)
}
// ParseMediaSize converts s to a recognized MediaSize, or returns an error if unrecognized
func ParseMediaSize(s string) (Size, error) {
switch s {
case string(SizeSmall):
return SizeSmall, nil
case string(SizeOriginal):
return SizeOriginal, nil
case string(SizeStatic):
return SizeStatic, nil
}
return "", fmt.Errorf("%s not a recognized MediaSize", s)
}
// logrusWrapper is just a util for passing the logrus logger into the cron logging system.
type logrusWrapper struct{}
// Info logs routine messages about cron's operation.
func (l *logrusWrapper) Info(msg string, keysAndValues ...interface{}) {
log.Info("media manager cron logger: ", msg, keysAndValues)
}
// Error logs an error condition.
func (l *logrusWrapper) Error(err error, msg string, keysAndValues ...interface{}) {
log.Error("media manager cron logger: ", err, msg, keysAndValues)
}

View File

@ -23,7 +23,6 @@ import (
"fmt"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/log"
)
func (p *processor) MediaPrune(ctx context.Context, mediaRemoteCacheDays int) gtserror.WithCode {
@ -32,32 +31,10 @@ func (p *processor) MediaPrune(ctx context.Context, mediaRemoteCacheDays int) gt
return gtserror.NewErrorBadRequest(err, err.Error())
}
go func() {
pruned, err := p.mediaManager.PruneAllRemote(context.Background(), mediaRemoteCacheDays)
if err != nil {
log.Errorf("MediaPrune: error pruning remote cache: %s", err)
} else {
log.Infof("MediaPrune: pruned %d remote cache entries", pruned)
}
}()
go func() {
pruned, err := p.mediaManager.PruneUnusedLocalAttachments(context.Background())
if err != nil {
log.Errorf("MediaPrune: error pruning unused local cache: %s", err)
} else {
log.Infof("MediaPrune: pruned %d unused local cache entries", pruned)
}
}()
go func() {
pruned, err := p.mediaManager.PruneAllMeta(context.Background())
if err != nil {
log.Errorf("MediaPrune: error pruning meta: %s", err)
} else {
log.Infof("MediaPrune: pruned %d meta entries", pruned)
}
}()
if err := p.mediaManager.PruneAll(ctx, mediaRemoteCacheDays, false); err != nil {
err = fmt.Errorf("MediaPrune: %w", err)
return gtserror.NewErrorInternalError(err)
}
return nil
}

View File

@ -33,14 +33,42 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/uris"
)
// ParseMediaType converts s to a recognized MediaType, or returns an error if unrecognized
func parseMediaType(s string) (media.Type, error) {
switch s {
case string(media.TypeAttachment):
return media.TypeAttachment, nil
case string(media.TypeHeader):
return media.TypeHeader, nil
case string(media.TypeAvatar):
return media.TypeAvatar, nil
case string(media.TypeEmoji):
return media.TypeEmoji, nil
}
return "", fmt.Errorf("%s not a recognized media.Type", s)
}
// ParseMediaSize converts s to a recognized MediaSize, or returns an error if unrecognized
func parseMediaSize(s string) (media.Size, error) {
switch s {
case string(media.SizeSmall):
return media.SizeSmall, nil
case string(media.SizeOriginal):
return media.SizeOriginal, nil
case string(media.SizeStatic):
return media.SizeStatic, nil
}
return "", fmt.Errorf("%s not a recognized media.Size", s)
}
func (p *processor) GetFile(ctx context.Context, requestingAccount *gtsmodel.Account, form *apimodel.GetContentRequestForm) (*apimodel.Content, gtserror.WithCode) {
// parse the form fields
mediaSize, err := media.ParseMediaSize(form.MediaSize)
mediaSize, err := parseMediaSize(form.MediaSize)
if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("media size %s not valid", form.MediaSize))
}
mediaType, err := media.ParseMediaType(form.MediaType)
mediaType, err := parseMediaType(form.MediaType)
if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("media type %s not valid", form.MediaType))
}

View File

@ -2,7 +2,7 @@
set -eu
EXPECT='{"account-domain":"peepee","accounts-allow-custom-css":true,"accounts-approval-required":false,"accounts-reason-required":false,"accounts-registration-open":true,"advanced-cookies-samesite":"strict","advanced-rate-limit-requests":6969,"advanced-throttling-multiplier":-1,"advanced-throttling-retry-after":10000000000,"application-name":"gts","bind-address":"127.0.0.1","cache":{"gts":{"account-max-size":99,"account-sweep-freq":1000000000,"account-ttl":10800000000000,"block-max-size":100,"block-sweep-freq":10000000000,"block-ttl":300000000000,"domain-block-max-size":1000,"domain-block-sweep-freq":60000000000,"domain-block-ttl":86400000000000,"emoji-category-max-size":100,"emoji-category-sweep-freq":10000000000,"emoji-category-ttl":300000000000,"emoji-max-size":500,"emoji-sweep-freq":10000000000,"emoji-ttl":300000000000,"mention-max-size":500,"mention-sweep-freq":10000000000,"mention-ttl":300000000000,"notification-max-size":500,"notification-sweep-freq":10000000000,"notification-ttl":300000000000,"report-max-size":100,"report-sweep-freq":10000000000,"report-ttl":300000000000,"status-max-size":500,"status-sweep-freq":10000000000,"status-ttl":300000000000,"tombstone-max-size":100,"tombstone-sweep-freq":10000000000,"tombstone-ttl":300000000000,"user-max-size":100,"user-sweep-freq":10000000000,"user-ttl":300000000000}},"config-path":"internal/config/testdata/test.yaml","db-address":":memory:","db-database":"gotosocial_prod","db-max-open-conns-multiplier":3,"db-password":"hunter2","db-port":6969,"db-sqlite-busy-timeout":1000000000,"db-sqlite-cache-size":0,"db-sqlite-journal-mode":"DELETE","db-sqlite-synchronous":"FULL","db-tls-ca-cert":"","db-tls-mode":"disable","db-type":"sqlite","db-user":"sex-haver","dry-run":false,"email":"","host":"example.com","instance-deliver-to-shared-inboxes":false,"instance-expose-peers":true,"instance-expose-public-timeline":true,"instance-expose-suspended":true,"instance-expose-suspended-web":true,"landing-page-user":"admin","letsencrypt-cert-dir":"/gotosocial/storage/certs","letsencrypt-email-address":"","letsencrypt-enabled":true,"letsencrypt-port":80,"log-db-queries":true,"log-level":"info","media-description-max-chars":5000,"media-description-min-chars":69,"media-emoji-local-max-size":420,"media-emoji-remote-max-size":420,"media-image-max-size":420,"media-remote-cache-days":30,"media-video-max-size":420,"oidc-client-id":"1234","oidc-client-secret":"shhhh its a secret","oidc-enabled":true,"oidc-idp-name":"sex-haver","oidc-issuer":"whoknows","oidc-link-existing":true,"oidc-scopes":["read","write"],"oidc-skip-verification":true,"password":"","path":"","port":6969,"protocol":"http","smtp-from":"queen.rip.in.piss@terfisland.org","smtp-host":"example.com","smtp-password":"hunter2","smtp-port":4269,"smtp-username":"sex-haver","software-version":"","statuses-cw-max-chars":420,"statuses-max-chars":69,"statuses-media-max-files":1,"statuses-poll-max-options":1,"statuses-poll-option-max-chars":50,"storage-backend":"local","storage-local-base-path":"/root/store","storage-s3-access-key":"minio","storage-s3-bucket":"gts","storage-s3-endpoint":"localhost:9000","storage-s3-proxy":true,"storage-s3-secret-key":"miniostorage","storage-s3-use-ssl":false,"syslog-address":"127.0.0.1:6969","syslog-enabled":true,"syslog-protocol":"udp","trusted-proxies":["127.0.0.1/32","docker.host.local"],"username":"","web-asset-base-dir":"/root","web-template-base-dir":"/root"}'
EXPECT='{"account-domain":"peepee","accounts-allow-custom-css":true,"accounts-approval-required":false,"accounts-reason-required":false,"accounts-registration-open":true,"advanced-cookies-samesite":"strict","advanced-rate-limit-requests":6969,"advanced-throttling-multiplier":-1,"advanced-throttling-retry-after":10000000000,"application-name":"gts","bind-address":"127.0.0.1","cache":{"gts":{"account-max-size":99,"account-sweep-freq":1000000000,"account-ttl":10800000000000,"block-max-size":100,"block-sweep-freq":10000000000,"block-ttl":300000000000,"domain-block-max-size":1000,"domain-block-sweep-freq":60000000000,"domain-block-ttl":86400000000000,"emoji-category-max-size":100,"emoji-category-sweep-freq":10000000000,"emoji-category-ttl":300000000000,"emoji-max-size":500,"emoji-sweep-freq":10000000000,"emoji-ttl":300000000000,"mention-max-size":500,"mention-sweep-freq":10000000000,"mention-ttl":300000000000,"notification-max-size":500,"notification-sweep-freq":10000000000,"notification-ttl":300000000000,"report-max-size":100,"report-sweep-freq":10000000000,"report-ttl":300000000000,"status-max-size":500,"status-sweep-freq":10000000000,"status-ttl":300000000000,"tombstone-max-size":100,"tombstone-sweep-freq":10000000000,"tombstone-ttl":300000000000,"user-max-size":100,"user-sweep-freq":10000000000,"user-ttl":300000000000}},"config-path":"internal/config/testdata/test.yaml","db-address":":memory:","db-database":"gotosocial_prod","db-max-open-conns-multiplier":3,"db-password":"hunter2","db-port":6969,"db-sqlite-busy-timeout":1000000000,"db-sqlite-cache-size":0,"db-sqlite-journal-mode":"DELETE","db-sqlite-synchronous":"FULL","db-tls-ca-cert":"","db-tls-mode":"disable","db-type":"sqlite","db-user":"sex-haver","dry-run":true,"email":"","host":"example.com","instance-deliver-to-shared-inboxes":false,"instance-expose-peers":true,"instance-expose-public-timeline":true,"instance-expose-suspended":true,"instance-expose-suspended-web":true,"landing-page-user":"admin","letsencrypt-cert-dir":"/gotosocial/storage/certs","letsencrypt-email-address":"","letsencrypt-enabled":true,"letsencrypt-port":80,"log-db-queries":true,"log-level":"info","media-description-max-chars":5000,"media-description-min-chars":69,"media-emoji-local-max-size":420,"media-emoji-remote-max-size":420,"media-image-max-size":420,"media-remote-cache-days":30,"media-video-max-size":420,"oidc-client-id":"1234","oidc-client-secret":"shhhh its a secret","oidc-enabled":true,"oidc-idp-name":"sex-haver","oidc-issuer":"whoknows","oidc-link-existing":true,"oidc-scopes":["read","write"],"oidc-skip-verification":true,"password":"","path":"","port":6969,"protocol":"http","smtp-from":"queen.rip.in.piss@terfisland.org","smtp-host":"example.com","smtp-password":"hunter2","smtp-port":4269,"smtp-username":"sex-haver","software-version":"","statuses-cw-max-chars":420,"statuses-max-chars":69,"statuses-media-max-files":1,"statuses-poll-max-options":1,"statuses-poll-option-max-chars":50,"storage-backend":"local","storage-local-base-path":"/root/store","storage-s3-access-key":"minio","storage-s3-bucket":"gts","storage-s3-endpoint":"localhost:9000","storage-s3-proxy":true,"storage-s3-secret-key":"miniostorage","storage-s3-use-ssl":false,"syslog-address":"127.0.0.1:6969","syslog-enabled":true,"syslog-protocol":"udp","trusted-proxies":["127.0.0.1/32","docker.host.local"],"username":"","web-asset-base-dir":"/root","web-template-base-dir":"/root"}'
# Set all the environment variables to
# ensure that these are parsed without panic

View File

@ -34,7 +34,7 @@ func InitTestConfig() {
}
var testDefaults = config.Configuration{
LogLevel: "info",
LogLevel: "trace",
LogDbQueries: true,
ApplicationName: "gotosocial",
LandingPageUser: "",