[chore] media and emoji refactoring (#3000)

* start updating media manager interface ready for storing attachments / emoji right away

* store emoji and media as uncached immediately, then (re-)cache on Processing{}.Load()

* remove now unused media workers

* fix tests and issues

* fix another test!

* fix emoji activitypub uri setting behaviour, fix remainder of test compilation issues

* fix more tests

* fix (most of) remaining tests, add debouncing to repeatedly failing media / emojis

* whoops, rebase issue

* remove kim's whacky experiments

* do some reshuffling, ensure emoji uri gets set

* ensure marked as not cached on cleanup

* tweaks to media / emoji processing to handle context canceled better

* ensure newly fetched emojis actually get set in returned slice

* use different varnames to be a bit more obvious

* move emoji refresh rate limiting to dereferencer

* add exported dereferencer functions for remote media, use these for recaching in processor

* add check for nil attachment in updateAttachment()

* remove unused emoji and media fields + columns

* see previous commit

* fix old migrations expecting image_updated_at to exists (from copies of old models)

* remove freshness checking code (seems to be broken...)

* fix error arg causing nil ptr exception

* finish documentating functions with comments, slight tweaks to media / emoji deref error logic

* remove some extra unneeded boolean checking

* finish writing documentation (code comments) for exported media manager methods

* undo changes to migration snapshot gtsmodels, updated failing migration to have its own snapshot

* move doesColumnExist() to util.go in migrations package
This commit is contained in:
kim
2024-06-26 15:01:16 +00:00
committed by GitHub
parent fa710057c8
commit 21bb324156
48 changed files with 2578 additions and 1926 deletions

View File

@@ -19,14 +19,14 @@ package media
import (
"context"
"errors"
"fmt"
"io"
"net/url"
"strings"
"time"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/media"
@@ -38,7 +38,7 @@ import (
// to the caller via an io.reader embedded in *apimodel.Content.
func (p *Processor) GetFile(
ctx context.Context,
requestingAccount *gtsmodel.Account,
requester *gtsmodel.Account,
form *apimodel.GetContentRequestForm,
) (*apimodel.Content, gtserror.WithCode) {
// parse the form fields
@@ -69,13 +69,13 @@ func (p *Processor) GetFile(
}
// make sure the requesting account and the media account don't block each other
if requestingAccount != nil {
blocked, err := p.state.DB.IsEitherBlocked(ctx, requestingAccount.ID, owningAccountID)
if requester != nil {
blocked, err := p.state.DB.IsEitherBlocked(ctx, requester.ID, owningAccountID)
if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("block status could not be established between accounts %s and %s: %s", owningAccountID, requestingAccount.ID, err))
return nil, gtserror.NewErrorNotFound(fmt.Errorf("block status could not be established between accounts %s and %s: %s", owningAccountID, requester.ID, err))
}
if blocked {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("block exists between accounts %s and %s", owningAccountID, requestingAccount.ID))
return nil, gtserror.NewErrorNotFound(fmt.Errorf("block exists between accounts %s and %s", owningAccountID, requester.ID))
}
}
@@ -83,17 +83,254 @@ func (p *Processor) GetFile(
// so we need to take different steps depending on the media type being requested
switch mediaType {
case media.TypeEmoji:
return p.getEmojiContent(ctx, wantedMediaID, owningAccountID, mediaSize)
return p.getEmojiContent(ctx,
owningAccountID,
wantedMediaID,
mediaSize,
)
case media.TypeAttachment, media.TypeHeader, media.TypeAvatar:
return p.getAttachmentContent(ctx, requestingAccount, wantedMediaID, owningAccountID, mediaSize)
return p.getAttachmentContent(ctx,
requester,
owningAccountID,
wantedMediaID,
mediaSize,
)
default:
return nil, gtserror.NewErrorNotFound(fmt.Errorf("media type %s not recognized", mediaType))
}
}
/*
UTIL FUNCTIONS
*/
func (p *Processor) getAttachmentContent(
ctx context.Context,
requester *gtsmodel.Account,
ownerID string,
mediaID string,
sizeStr media.Size,
) (
*apimodel.Content,
gtserror.WithCode,
) {
// Search for media with given ID in the database.
attach, err := p.state.DB.GetAttachmentByID(ctx, mediaID)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
err := gtserror.Newf("error fetching media from database: %w", err)
return nil, gtserror.NewErrorInternalError(err)
}
if attach == nil {
const text = "media not found"
return nil, gtserror.NewErrorNotFound(errors.New(text), text)
}
// Ensure the 'owner' owns media.
if attach.AccountID != ownerID {
const text = "media was not owned by passed account id"
return nil, gtserror.NewErrorNotFound(errors.New(text) /* no help text! */)
}
var remoteURL *url.URL
if attach.RemoteURL != "" {
// Parse media remote URL to valid URL object.
remoteURL, err = url.Parse(attach.RemoteURL)
if err != nil {
err := gtserror.Newf("invalid media remote url %s: %w", attach.RemoteURL, err)
return nil, gtserror.NewErrorInternalError(err)
}
}
// Uknown file types indicate no *locally*
// stored data we can serve. Handle separately.
if attach.Type == gtsmodel.FileTypeUnknown {
if remoteURL == nil {
err := gtserror.Newf("missing remote url for unknown type media %s: %w", attach.ID, err)
return nil, gtserror.NewErrorInternalError(err)
}
// If this is an "Unknown" file type, ie., one we
// tried to process and couldn't, or one we refused
// to process because it wasn't supported, then we
// can skip a lot of steps here by simply forwarding
// the request to the remote URL.
url := &storage.PresignedURL{
URL: remoteURL,
// We might manage to cache the media
// at some point, so set a low-ish expiry.
Expiry: time.Now().Add(2 * time.Hour),
}
return &apimodel.Content{URL: url}, nil
}
var requestUser string
if requester != nil {
// Set requesting acc username.
requestUser = requester.Username
}
// Ensure that stored media is cached.
// (this handles local media / recaches).
attach, err = p.federator.RefreshMedia(
ctx,
requestUser,
attach,
media.AdditionalMediaInfo{},
false,
)
if err != nil {
err := gtserror.Newf("error recaching media: %w", err)
return nil, gtserror.NewErrorNotFound(err)
}
// Start preparing API content model.
apiContent := &apimodel.Content{
ContentUpdated: attach.UpdatedAt,
}
// Retrieve appropriate
// size file from storage.
switch sizeStr {
case media.SizeOriginal:
apiContent.ContentType = attach.File.ContentType
apiContent.ContentLength = int64(attach.File.FileSize)
return p.getContent(ctx,
attach.File.Path,
apiContent,
)
case media.SizeSmall:
apiContent.ContentType = attach.Thumbnail.ContentType
apiContent.ContentLength = int64(attach.Thumbnail.FileSize)
return p.getContent(ctx,
attach.Thumbnail.Path,
apiContent,
)
default:
const text = "invalid media attachment size"
return nil, gtserror.NewErrorBadRequest(errors.New(text), text)
}
}
func (p *Processor) getEmojiContent(
ctx context.Context,
ownerID string,
emojiID string,
sizeStr media.Size,
) (
*apimodel.Content,
gtserror.WithCode,
) {
// Reconstruct static emoji image URL to search for it.
// As refreshed emojis use a newly generated path ID to
// differentiate them (cache-wise) from the original.
staticURL := uris.URIForAttachment(
ownerID,
string(media.TypeEmoji),
string(media.SizeStatic),
emojiID,
"png",
)
// Search for emoji with given static URL in the database.
emoji, err := p.state.DB.GetEmojiByStaticURL(ctx, staticURL)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
err := gtserror.Newf("error fetching emoji from database: %w", err)
return nil, gtserror.NewErrorInternalError(err)
}
if emoji == nil {
const text = "emoji not found"
return nil, gtserror.NewErrorNotFound(errors.New(text), text)
}
if *emoji.Disabled {
const text = "emoji has been disabled"
return nil, gtserror.NewErrorNotFound(errors.New(text), text)
}
// Ensure that stored emoji is cached.
// (this handles local emoji / recaches).
emoji, err = p.federator.RefreshEmoji(
ctx,
emoji,
media.AdditionalEmojiInfo{},
false,
)
if err != nil {
err := gtserror.Newf("error recaching emoji: %w", err)
return nil, gtserror.NewErrorNotFound(err)
}
// Start preparing API content model.
apiContent := &apimodel.Content{}
// Retrieve appropriate
// size file from storage.
switch sizeStr {
case media.SizeOriginal:
apiContent.ContentType = emoji.ImageContentType
apiContent.ContentLength = int64(emoji.ImageFileSize)
return p.getContent(ctx,
emoji.ImagePath,
apiContent,
)
case media.SizeStatic:
apiContent.ContentType = emoji.ImageStaticContentType
apiContent.ContentLength = int64(emoji.ImageStaticFileSize)
return p.getContent(ctx,
emoji.ImageStaticPath,
apiContent,
)
default:
const text = "invalid media attachment size"
return nil, gtserror.NewErrorBadRequest(errors.New(text), text)
}
}
// getContent performs the final file fetching of
// stored content at path in storage. This is
// populated in the apimodel.Content{} and returned.
// (note: this also handles un-proxied S3 storage).
func (p *Processor) getContent(
ctx context.Context,
path string,
content *apimodel.Content,
) (
*apimodel.Content,
gtserror.WithCode,
) {
// If running on S3 storage with proxying disabled then
// just fetch pre-signed URL instead of the content.
if url := p.state.Storage.URL(ctx, path); url != nil {
content.URL = url
return content, nil
}
// Fetch file stream for the stored media at path.
rc, err := p.state.Storage.GetStream(ctx, path)
if err != nil && !storage.IsNotFound(err) {
err := gtserror.Newf("error getting file %s from storage: %w", path, err)
return nil, gtserror.NewErrorInternalError(err)
}
// Ensure found.
if rc == nil {
const text = "file not found"
return nil, gtserror.NewErrorNotFound(errors.New(text), text)
}
// Return with stream.
content.Content = rc
return content, nil
}
func parseType(s string) (media.Type, error) {
switch s {
@@ -120,198 +357,3 @@ func parseSize(s string) (media.Size, error) {
}
return "", fmt.Errorf("%s not a recognized media.Size", s)
}
func (p *Processor) getAttachmentContent(ctx context.Context, requestingAccount *gtsmodel.Account, wantedMediaID string, owningAccountID string, mediaSize media.Size) (*apimodel.Content, gtserror.WithCode) {
// retrieve attachment from the database and do basic checks on it
a, err := p.state.DB.GetAttachmentByID(ctx, wantedMediaID)
if err != nil {
err = gtserror.Newf("attachment %s could not be taken from the db: %w", wantedMediaID, err)
return nil, gtserror.NewErrorNotFound(err)
}
if a.AccountID != owningAccountID {
err = gtserror.Newf("attachment %s is not owned by %s", wantedMediaID, owningAccountID)
return nil, gtserror.NewErrorNotFound(err)
}
// If this is an "Unknown" file type, ie., one we
// tried to process and couldn't, or one we refused
// to process because it wasn't supported, then we
// can skip a lot of steps here by simply forwarding
// the request to the remote URL.
if a.Type == gtsmodel.FileTypeUnknown {
remoteURL, err := url.Parse(a.RemoteURL)
if err != nil {
err = gtserror.Newf("error parsing remote URL of 'Unknown'-type attachment for redirection: %w", err)
return nil, gtserror.NewErrorInternalError(err)
}
url := &storage.PresignedURL{
URL: remoteURL,
// We might manage to cache the media
// at some point, so set a low-ish expiry.
Expiry: time.Now().Add(2 * time.Hour),
}
return &apimodel.Content{URL: url}, nil
}
if !*a.Cached {
// if we don't have it cached, then we can assume two things:
// 1. this is remote media, since local media should never be uncached
// 2. we need to fetch it again using a transport and the media manager
remoteMediaIRI, err := url.Parse(a.RemoteURL)
if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("error parsing remote media iri %s: %w", a.RemoteURL, err))
}
// use an empty string as requestingUsername to use the instance account, unless the request for this
// media has been http signed, then use the requesting account to make the request to remote server
var requestingUsername string
if requestingAccount != nil {
requestingUsername = requestingAccount.Username
}
// Pour one out for tobi's original streamed recache
// (streaming data both to the client and storage).
// Gone and forever missed <3
//
// [
// the reason it was removed was because a slow
// client connection could hold open a storage
// recache operation -> holding open a media worker.
// ]
dataFn := func(ctx context.Context) (io.ReadCloser, int64, error) {
t, err := p.transportController.NewTransportForUsername(ctx, requestingUsername)
if err != nil {
return nil, 0, err
}
return t.DereferenceMedia(gtscontext.SetFastFail(ctx), remoteMediaIRI)
}
// Start recaching this media with the prepared data function.
processingMedia, err := p.mediaManager.PreProcessMediaRecache(ctx, dataFn, wantedMediaID)
if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("error recaching media: %w", err))
}
// Load attachment and block until complete
a, err = processingMedia.LoadAttachment(ctx)
if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("error loading recached attachment: %w", err))
}
}
var (
storagePath string
attachmentContent = &apimodel.Content{
ContentUpdated: a.UpdatedAt,
}
)
// get file information from the attachment depending on the requested media size
switch mediaSize {
case media.SizeOriginal:
attachmentContent.ContentType = a.File.ContentType
attachmentContent.ContentLength = int64(a.File.FileSize)
storagePath = a.File.Path
case media.SizeSmall:
attachmentContent.ContentType = a.Thumbnail.ContentType
attachmentContent.ContentLength = int64(a.Thumbnail.FileSize)
storagePath = a.Thumbnail.Path
default:
return nil, gtserror.NewErrorNotFound(fmt.Errorf("media size %s not recognized for attachment", mediaSize))
}
// ... so now we can safely return it
return p.retrieveFromStorage(ctx, storagePath, attachmentContent)
}
func (p *Processor) getEmojiContent(ctx context.Context, fileName string, owningAccountID string, emojiSize media.Size) (*apimodel.Content, gtserror.WithCode) {
emojiContent := &apimodel.Content{}
var storagePath string
// reconstruct the static emoji image url -- reason
// for using the static URL rather than full size url
// is that static emojis are always encoded as png,
// so this is more reliable than using full size url
imageStaticURL := uris.URIForAttachment(
owningAccountID,
string(media.TypeEmoji),
string(media.SizeStatic),
fileName,
"png",
)
e, err := p.state.DB.GetEmojiByStaticURL(ctx, imageStaticURL)
if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("emoji %s could not be taken from the db: %w", fileName, err))
}
if *e.Disabled {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("emoji %s has been disabled", fileName))
}
if !*e.Cached {
// if we don't have it cached, then we can assume two things:
// 1. this is remote emoji, since local emoji should never be uncached
// 2. we need to fetch it again using a transport and the media manager
remoteURL, err := url.Parse(e.ImageRemoteURL)
if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("error parsing remote emoji iri %s: %w", e.ImageRemoteURL, err))
}
dataFn := func(ctx context.Context) (io.ReadCloser, int64, error) {
t, err := p.transportController.NewTransportForUsername(ctx, "")
if err != nil {
return nil, 0, err
}
return t.DereferenceMedia(gtscontext.SetFastFail(ctx), remoteURL)
}
// Start recaching this emoji with the prepared data function.
processingEmoji, err := p.mediaManager.PreProcessEmojiRecache(ctx, dataFn, e.ID)
if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("error recaching emoji: %w", err))
}
// Load attachment and block until complete
e, err = processingEmoji.LoadEmoji(ctx)
if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("error loading recached emoji: %w", err))
}
}
switch emojiSize {
case media.SizeOriginal:
emojiContent.ContentType = e.ImageContentType
emojiContent.ContentLength = int64(e.ImageFileSize)
storagePath = e.ImagePath
case media.SizeStatic:
emojiContent.ContentType = e.ImageStaticContentType
emojiContent.ContentLength = int64(e.ImageStaticFileSize)
storagePath = e.ImageStaticPath
default:
return nil, gtserror.NewErrorNotFound(fmt.Errorf("media size %s not recognized for emoji", emojiSize))
}
return p.retrieveFromStorage(ctx, storagePath, emojiContent)
}
func (p *Processor) retrieveFromStorage(ctx context.Context, storagePath string, content *apimodel.Content) (*apimodel.Content, gtserror.WithCode) {
// If running on S3 storage with proxying disabled then
// just fetch a pre-signed URL instead of serving the content.
if url := p.state.Storage.URL(ctx, storagePath); url != nil {
content.URL = url
return content, nil
}
reader, err := p.state.Storage.GetStream(ctx, storagePath)
if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("error retrieving from storage: %s", err))
}
content.Content = reader
return content, nil
}