2021-02-28 15:17:18 +01:00
|
|
|
/*
|
2021-03-01 15:41:43 +01:00
|
|
|
GoToSocial
|
2023-01-05 12:43:00 +01:00
|
|
|
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
|
2021-02-28 15:17:18 +01:00
|
|
|
|
2021-03-01 15:41:43 +01:00
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU Affero General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
2021-02-28 15:17:18 +01:00
|
|
|
|
2021-03-01 15:41:43 +01:00
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU Affero General Public License for more details.
|
2021-02-28 15:17:18 +01:00
|
|
|
|
2021-03-01 15:41:43 +01:00
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2021-02-28 15:17:18 +01:00
|
|
|
*/
|
|
|
|
|
2021-03-09 17:03:40 +01:00
|
|
|
package media
|
|
|
|
|
2021-04-01 20:46:45 +02:00
|
|
|
import (
|
2021-05-17 19:06:58 +02:00
|
|
|
"context"
|
2022-03-07 11:08:26 +01:00
|
|
|
"fmt"
|
2021-04-01 20:46:45 +02:00
|
|
|
|
2022-05-15 11:16:43 +02:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/concurrency"
|
2021-04-01 20:46:45 +02:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/db"
|
2022-07-03 12:08:30 +02:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/storage"
|
2021-04-01 20:46:45 +02:00
|
|
|
)
|
|
|
|
|
2023-02-11 12:48:38 +01:00
|
|
|
var SupportedMIMETypes = []string{
|
|
|
|
mimeImageJpeg,
|
|
|
|
mimeImageGif,
|
|
|
|
mimeImagePng,
|
|
|
|
mimeImageWebp,
|
|
|
|
mimeVideoMp4,
|
|
|
|
}
|
2022-05-15 16:45:04 +02:00
|
|
|
|
2023-02-11 12:48:38 +01:00
|
|
|
var SupportedEmojiMIMETypes = []string{
|
|
|
|
mimeImageGif,
|
|
|
|
mimeImagePng,
|
|
|
|
}
|
2022-06-30 12:22:10 +02:00
|
|
|
|
2021-12-28 16:36:00 +01:00
|
|
|
// Manager provides an interface for managing media: parsing, storing, and retrieving media objects like photos, videos, and gifs.
|
|
|
|
type Manager interface {
|
2022-12-10 22:43:11 +01:00
|
|
|
// Stop stops the underlying worker pool of the manager. It should be called
|
|
|
|
// when closing GoToSocial in order to cleanly finish any in-progress jobs.
|
|
|
|
// It will block until workers are finished processing.
|
|
|
|
Stop() error
|
|
|
|
|
|
|
|
/*
|
|
|
|
PROCESSING FUNCTIONS
|
|
|
|
*/
|
|
|
|
|
2022-01-11 17:49:14 +01:00
|
|
|
// ProcessMedia begins the process of decoding and storing the given data as an attachment.
|
2022-02-22 13:50:33 +01:00
|
|
|
// It will return a pointer to a ProcessingMedia struct upon which further actions can be performed, such as getting
|
2022-01-10 18:36:09 +01:00
|
|
|
// the finished media, thumbnail, attachment, etc.
|
2022-01-08 17:17:01 +01:00
|
|
|
//
|
2022-02-22 13:50:33 +01:00
|
|
|
// data should be a function that the media manager can call to return a reader containing the media data.
|
|
|
|
//
|
|
|
|
// postData will be called after data has been called; it can be used to clean up any remaining resources.
|
|
|
|
// The provided function can be nil, in which case it will not be executed.
|
2022-01-11 17:49:14 +01:00
|
|
|
//
|
2022-01-08 17:17:01 +01:00
|
|
|
// accountID should be the account that the media belongs to.
|
|
|
|
//
|
2022-01-10 18:36:09 +01:00
|
|
|
// ai is optional and can be nil. Any additional information about the attachment provided will be put in the database.
|
2022-02-22 13:50:33 +01:00
|
|
|
ProcessMedia(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, accountID string, ai *AdditionalMediaInfo) (*ProcessingMedia, error)
|
|
|
|
// ProcessEmoji begins the process of decoding and storing the given data as an emoji.
|
|
|
|
// It will return a pointer to a ProcessingEmoji struct upon which further actions can be performed, such as getting
|
|
|
|
// the finished media, thumbnail, attachment, etc.
|
|
|
|
//
|
|
|
|
// data should be a function that the media manager can call to return a reader containing the emoji data.
|
|
|
|
//
|
|
|
|
// postData will be called after data has been called; it can be used to clean up any remaining resources.
|
|
|
|
// The provided function can be nil, in which case it will not be executed.
|
|
|
|
//
|
|
|
|
// shortcode should be the emoji shortcode without the ':'s around it.
|
|
|
|
//
|
|
|
|
// id is the database ID that should be used to store the emoji.
|
|
|
|
//
|
|
|
|
// uri is the ActivityPub URI/ID of the emoji.
|
|
|
|
//
|
|
|
|
// ai is optional and can be nil. Any additional information about the emoji provided will be put in the database.
|
2022-10-13 15:16:24 +02:00
|
|
|
//
|
|
|
|
// If refresh is true, this indicates that the emoji image has changed and should be updated.
|
|
|
|
ProcessEmoji(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, shortcode string, id string, uri string, ai *AdditionalEmojiInfo, refresh bool) (*ProcessingEmoji, error)
|
2022-03-07 11:08:26 +01:00
|
|
|
// RecacheMedia refetches, reprocesses, and recaches an existing attachment that has been uncached via pruneRemote.
|
|
|
|
RecacheMedia(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, attachmentID string) (*ProcessingMedia, error)
|
2022-05-15 16:45:04 +02:00
|
|
|
|
2022-12-10 22:43:11 +01:00
|
|
|
/*
|
2023-02-11 12:48:38 +01:00
|
|
|
PRUNING/UNCACHING FUNCTIONS
|
2022-12-10 22:43:11 +01:00
|
|
|
*/
|
|
|
|
|
2023-02-11 12:48:38 +01:00
|
|
|
// PruneAll runs all of the below pruning/uncacheing functions, and then cleans up any resulting
|
|
|
|
// empty directories from the storage driver. It can be called as a shortcut for calling the below
|
|
|
|
// pruning functions one by one.
|
2022-05-15 16:45:04 +02:00
|
|
|
//
|
2023-02-11 12:48:38 +01:00
|
|
|
// If blocking is true, then any errors encountered during the prune will be combined + returned to
|
|
|
|
// the caller. If blocking is false, the prune is run in the background and errors are just logged
|
|
|
|
// instead.
|
|
|
|
PruneAll(ctx context.Context, mediaCacheRemoteDays int, blocking bool) error
|
|
|
|
// UncacheRemote uncaches all remote media attachments older than the given amount of days.
|
|
|
|
//
|
|
|
|
// In this context, uncacheing means deleting media files from storage and marking the attachment
|
|
|
|
// as cached=false in the database.
|
|
|
|
//
|
|
|
|
// If 'dry' is true, then only a dry run will be performed: nothing will actually be changed.
|
|
|
|
//
|
|
|
|
// The returned int is the amount of media that was/would be uncached by this function.
|
|
|
|
UncacheRemote(ctx context.Context, olderThanDays int, dry bool) (int, error)
|
|
|
|
// PruneUnusedRemote prunes unused/out of date headers and avatars cached on this instance.
|
2022-05-15 16:45:04 +02:00
|
|
|
//
|
|
|
|
// The returned int is the amount of media that was pruned by this function.
|
2023-02-11 12:48:38 +01:00
|
|
|
PruneUnusedRemote(ctx context.Context, dry bool) (int, error)
|
|
|
|
// PruneUnusedLocal prunes unused media attachments that were uploaded by
|
2022-06-30 12:22:10 +02:00
|
|
|
// a user on this instance, but never actually attached to a status, or attached but
|
|
|
|
// later detached.
|
|
|
|
//
|
|
|
|
// The returned int is the amount of media that was pruned by this function.
|
2023-02-11 12:48:38 +01:00
|
|
|
PruneUnusedLocal(ctx context.Context, dry bool) (int, error)
|
2022-11-25 18:23:42 +01:00
|
|
|
// PruneOrphaned prunes files that exist in storage but which do not have a corresponding
|
|
|
|
// entry in the database.
|
|
|
|
//
|
|
|
|
// If dry is true, then nothing will be changed, only the amount that *would* be removed
|
|
|
|
// is returned to the caller.
|
|
|
|
PruneOrphaned(ctx context.Context, dry bool) (int, error)
|
2022-05-15 16:45:04 +02:00
|
|
|
|
2022-12-10 22:43:11 +01:00
|
|
|
/*
|
|
|
|
REFETCHING FUNCTIONS
|
|
|
|
Useful when data loss has occurred.
|
|
|
|
*/
|
|
|
|
|
|
|
|
// RefetchEmojis iterates through remote emojis (for the given domain, or all if domain is empty string).
|
|
|
|
//
|
|
|
|
// For each emoji, the manager will check whether both the full size and static images are present in storage.
|
|
|
|
// If not, the manager will refetch and reprocess full size and static images for the emoji.
|
|
|
|
//
|
|
|
|
// The provided DereferenceMedia function will be used when it's necessary to refetch something this way.
|
|
|
|
RefetchEmojis(ctx context.Context, domain string, dereferenceMedia DereferenceMedia) (int, error)
|
2021-04-01 20:46:45 +02:00
|
|
|
}
|
|
|
|
|
2021-12-28 16:36:00 +01:00
|
|
|
type manager struct {
|
2022-03-07 11:08:26 +01:00
|
|
|
db db.DB
|
2022-11-24 09:35:46 +01:00
|
|
|
storage *storage.Driver
|
2022-05-15 11:16:43 +02:00
|
|
|
emojiWorker *concurrency.WorkerPool[*ProcessingEmoji]
|
|
|
|
mediaWorker *concurrency.WorkerPool[*ProcessingMedia]
|
2022-03-07 11:08:26 +01:00
|
|
|
stopCronJobs func() error
|
2021-04-01 20:46:45 +02:00
|
|
|
}
|
|
|
|
|
2022-01-10 18:36:09 +01:00
|
|
|
// NewManager returns a media manager with the given db and underlying storage.
|
|
|
|
//
|
|
|
|
// A worker pool will also be initialized for the manager, to ensure that only
|
2022-05-07 17:36:01 +02:00
|
|
|
// a limited number of media will be processed in parallel. The numbers of workers
|
|
|
|
// is determined from the $GOMAXPROCS environment variable (usually no. CPU cores).
|
2022-05-15 11:16:43 +02:00
|
|
|
// See internal/concurrency.NewWorkerPool() documentation for further information.
|
2022-11-24 09:35:46 +01:00
|
|
|
func NewManager(database db.DB, storage *storage.Driver) (Manager, error) {
|
2022-01-03 17:37:38 +01:00
|
|
|
m := &manager{
|
2022-05-07 17:36:01 +02:00
|
|
|
db: database,
|
|
|
|
storage: storage,
|
2022-01-10 18:36:09 +01:00
|
|
|
}
|
|
|
|
|
2023-02-11 12:48:38 +01:00
|
|
|
// Prepare the media worker pool.
|
2022-05-15 11:16:43 +02:00
|
|
|
m.mediaWorker = concurrency.NewWorkerPool[*ProcessingMedia](-1, 10)
|
2022-05-07 17:36:01 +02:00
|
|
|
m.mediaWorker.SetProcessor(func(ctx context.Context, media *ProcessingMedia) error {
|
|
|
|
if _, err := media.LoadAttachment(ctx); err != nil {
|
|
|
|
return fmt.Errorf("error loading media %s: %v", media.AttachmentID(), err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
|
2023-02-11 12:48:38 +01:00
|
|
|
// Prepare the emoji worker pool.
|
2022-05-15 11:16:43 +02:00
|
|
|
m.emojiWorker = concurrency.NewWorkerPool[*ProcessingEmoji](-1, 10)
|
2022-05-07 17:36:01 +02:00
|
|
|
m.emojiWorker.SetProcessor(func(ctx context.Context, emoji *ProcessingEmoji) error {
|
|
|
|
if _, err := emoji.LoadEmoji(ctx); err != nil {
|
|
|
|
return fmt.Errorf("error loading emoji %s: %v", emoji.EmojiID(), err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
|
2023-02-11 12:48:38 +01:00
|
|
|
// Start the worker pools.
|
2022-05-07 17:36:01 +02:00
|
|
|
if err := m.mediaWorker.Start(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if err := m.emojiWorker.Start(); err != nil {
|
|
|
|
return nil, err
|
2021-04-01 20:46:45 +02:00
|
|
|
}
|
2022-01-03 17:37:38 +01:00
|
|
|
|
2023-02-11 12:48:38 +01:00
|
|
|
// Schedule cron job(s) for clean up.
|
|
|
|
if err := scheduleCleanup(m); err != nil {
|
2022-05-15 16:45:04 +02:00
|
|
|
return nil, err
|
2022-03-07 11:08:26 +01:00
|
|
|
}
|
|
|
|
|
2022-01-03 17:37:38 +01:00
|
|
|
return m, nil
|
2021-04-01 20:46:45 +02:00
|
|
|
}
|
|
|
|
|
2022-02-22 13:50:33 +01:00
|
|
|
func (m *manager) ProcessMedia(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, accountID string, ai *AdditionalMediaInfo) (*ProcessingMedia, error) {
|
|
|
|
processingMedia, err := m.preProcessMedia(ctx, data, postData, accountID, ai)
|
2021-12-28 16:36:00 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2022-05-07 17:36:01 +02:00
|
|
|
m.mediaWorker.Queue(processingMedia)
|
2022-01-11 17:49:14 +01:00
|
|
|
return processingMedia, nil
|
|
|
|
}
|
2022-01-03 17:37:38 +01:00
|
|
|
|
2022-10-13 15:16:24 +02:00
|
|
|
func (m *manager) ProcessEmoji(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, shortcode string, id string, uri string, ai *AdditionalEmojiInfo, refresh bool) (*ProcessingEmoji, error) {
|
|
|
|
processingEmoji, err := m.preProcessEmoji(ctx, data, postData, shortcode, id, uri, ai, refresh)
|
2022-01-11 17:49:14 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2021-05-21 15:48:26 +02:00
|
|
|
}
|
2022-05-07 17:36:01 +02:00
|
|
|
m.emojiWorker.Queue(processingEmoji)
|
2022-01-11 17:49:14 +01:00
|
|
|
return processingEmoji, nil
|
2022-01-08 17:17:01 +01:00
|
|
|
}
|
|
|
|
|
2022-03-07 11:08:26 +01:00
|
|
|
func (m *manager) RecacheMedia(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, attachmentID string) (*ProcessingMedia, error) {
|
|
|
|
processingRecache, err := m.preProcessRecache(ctx, data, postData, attachmentID)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2022-05-07 17:36:01 +02:00
|
|
|
m.mediaWorker.Queue(processingRecache)
|
2022-03-07 11:08:26 +01:00
|
|
|
return processingRecache, nil
|
|
|
|
}
|
|
|
|
|
2022-05-07 17:36:01 +02:00
|
|
|
func (m *manager) Stop() error {
|
2023-02-11 12:48:38 +01:00
|
|
|
// Stop worker pools.
|
2022-05-07 17:36:01 +02:00
|
|
|
mediaErr := m.mediaWorker.Stop()
|
|
|
|
emojiErr := m.emojiWorker.Stop()
|
2022-01-09 18:41:22 +01:00
|
|
|
|
2022-05-07 17:36:01 +02:00
|
|
|
var cronErr error
|
|
|
|
if m.stopCronJobs != nil {
|
|
|
|
cronErr = m.stopCronJobs()
|
2022-01-08 13:45:42 +01:00
|
|
|
}
|
2022-03-07 11:08:26 +01:00
|
|
|
|
2022-05-07 17:36:01 +02:00
|
|
|
if mediaErr != nil {
|
|
|
|
return mediaErr
|
|
|
|
} else if emojiErr != nil {
|
|
|
|
return emojiErr
|
2022-03-07 11:08:26 +01:00
|
|
|
}
|
2022-05-15 16:45:04 +02:00
|
|
|
|
2022-05-07 17:36:01 +02:00
|
|
|
return cronErr
|
2022-01-08 13:45:42 +01:00
|
|
|
}
|