[feature] support processing of (many) more media types (#3090)

* initial work replacing our media decoding / encoding pipeline with ffprobe + ffmpeg

* specify the video codec to use when generating static image from emoji

* update go-storage library (fixes incompatibility after updating go-iotools)

* maintain image aspect ratio when generating a thumbnail for it

* update readme to show go-ffmpreg

* fix a bunch of media tests, move filesize checking to callers of media manager for more flexibility

* remove extra debug from error message

* fix up incorrect function signatures

* update PutFile to just use regular file copy, as changes are file is on separate partition

* fix remaining tests, remove some unneeded tests now we're working with ffmpeg/ffprobe

* update more tests, add more code comments

* add utilities to generate processed emoji / media outputs

* fix remaining tests

* add test for opus media file, add license header to utility cmds

* limit the number of concurrently available ffmpeg / ffprobe instances

* reduce number of instances

* further reduce number of instances

* fix envparsing test with configuration variables

* update docs and configuration with new media-{local,remote}-max-size variables
This commit is contained in:
kim
2024-07-12 09:39:47 +00:00
committed by GitHub
parent 5bc567196b
commit cde2fb6244
376 changed files with 8026 additions and 54091 deletions

View File

@@ -24,7 +24,7 @@ import (
"io"
"mime/multipart"
"codeberg.org/gruf/go-bytesize"
"codeberg.org/gruf/go-iotools"
"github.com/superseriousbusiness/gotosocial/internal/ap"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/config"
@@ -365,21 +365,31 @@ func (p *Processor) UpdateAvatar(
*gtsmodel.MediaAttachment,
gtserror.WithCode,
) {
max := config.GetMediaImageMaxSize()
if sz := bytesize.Size(avatar.Size); sz > max {
text := fmt.Sprintf("size %s exceeds max media size %s", sz, max)
// Get maximum supported local media size.
maxsz := config.GetMediaLocalMaxSize()
// Ensure media within size bounds.
if avatar.Size > int64(maxsz) {
text := fmt.Sprintf("media exceeds configured max size: %s", maxsz)
return nil, gtserror.NewErrorBadRequest(errors.New(text), text)
}
data := func(_ context.Context) (io.ReadCloser, int64, error) {
f, err := avatar.Open()
return f, avatar.Size, err
// Open multipart file reader.
mpfile, err := avatar.Open()
if err != nil {
err := gtserror.Newf("error opening multipart file: %w", err)
return nil, gtserror.NewErrorInternalError(err)
}
// Wrap the multipart file reader to ensure is limited to max.
rc, _, _ := iotools.UpdateReadCloserLimit(mpfile, int64(maxsz))
// Write to instance storage.
return p.c.StoreLocalMedia(ctx,
account.ID,
data,
func(ctx context.Context) (reader io.ReadCloser, err error) {
return rc, nil
},
media.AdditionalMediaInfo{
Avatar: util.Ptr(true),
Description: description,
@@ -400,21 +410,31 @@ func (p *Processor) UpdateHeader(
*gtsmodel.MediaAttachment,
gtserror.WithCode,
) {
max := config.GetMediaImageMaxSize()
if sz := bytesize.Size(header.Size); sz > max {
text := fmt.Sprintf("size %s exceeds max media size %s", sz, max)
// Get maximum supported local media size.
maxsz := config.GetMediaLocalMaxSize()
// Ensure media within size bounds.
if header.Size > int64(maxsz) {
text := fmt.Sprintf("media exceeds configured max size: %s", maxsz)
return nil, gtserror.NewErrorBadRequest(errors.New(text), text)
}
data := func(_ context.Context) (io.ReadCloser, int64, error) {
f, err := header.Open()
return f, header.Size, err
// Open multipart file reader.
mpfile, err := header.Open()
if err != nil {
err := gtserror.Newf("error opening multipart file: %w", err)
return nil, gtserror.NewErrorInternalError(err)
}
// Wrap the multipart file reader to ensure is limited to max.
rc, _, _ := iotools.UpdateReadCloserLimit(mpfile, int64(maxsz))
// Write to instance storage.
return p.c.StoreLocalMedia(ctx,
account.ID,
data,
func(ctx context.Context) (reader io.ReadCloser, err error) {
return rc, nil
},
media.AdditionalMediaInfo{
Header: util.Ptr(true),
Description: description,

View File

@@ -25,7 +25,10 @@ import (
"mime/multipart"
"strings"
"codeberg.org/gruf/go-bytesize"
"codeberg.org/gruf/go-iotools"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
@@ -41,10 +44,26 @@ func (p *Processor) EmojiCreate(
form *apimodel.EmojiCreateRequest,
) (*apimodel.Emoji, gtserror.WithCode) {
// Simply read provided form data for emoji data source.
data := func(_ context.Context) (io.ReadCloser, int64, error) {
f, err := form.Image.Open()
return f, form.Image.Size, err
// Get maximum supported local emoji size.
maxsz := config.GetMediaEmojiLocalMaxSize()
// Ensure media within size bounds.
if form.Image.Size > int64(maxsz) {
text := fmt.Sprintf("emoji exceeds configured max size: %s", maxsz)
return nil, gtserror.NewErrorBadRequest(errors.New(text), text)
}
// Open multipart file reader.
mpfile, err := form.Image.Open()
if err != nil {
err := gtserror.Newf("error opening multipart file: %w", err)
return nil, gtserror.NewErrorInternalError(err)
}
// Wrap the multipart file reader to ensure is limited to max.
rc, _, _ := iotools.UpdateReadCloserLimit(mpfile, int64(maxsz))
data := func(context.Context) (io.ReadCloser, error) {
return rc, nil
}
// Attempt to create the new local emoji.
@@ -285,14 +304,23 @@ func (p *Processor) emojiUpdateCopy(
return nil, gtserror.NewErrorNotFound(err)
}
// Get maximum supported local emoji size.
maxsz := config.GetMediaEmojiLocalMaxSize()
// Ensure target emoji image within size bounds.
if bytesize.Size(target.ImageFileSize) > maxsz {
text := fmt.Sprintf("emoji exceeds configured max size: %s", maxsz)
return nil, gtserror.NewErrorBadRequest(errors.New(text), text)
}
// Data function for copying just streams media
// out of storage into an additional location.
//
// This means that data for the copy persists even
// if the remote copied emoji gets deleted at some point.
data := func(ctx context.Context) (io.ReadCloser, int64, error) {
data := func(ctx context.Context) (io.ReadCloser, error) {
rc, err := p.state.Storage.GetStream(ctx, target.ImagePath)
return rc, int64(target.ImageFileSize), err
return rc, err
}
// Attempt to create the new local emoji.
@@ -413,10 +441,26 @@ func (p *Processor) emojiUpdateModify(
// Updating image and maybe categoryID.
// We can do both at the same time :)
// Simply read provided form data for emoji data source.
data := func(_ context.Context) (io.ReadCloser, int64, error) {
f, err := image.Open()
return f, image.Size, err
// Get maximum supported local emoji size.
maxsz := config.GetMediaEmojiLocalMaxSize()
// Ensure media within size bounds.
if image.Size > int64(maxsz) {
text := fmt.Sprintf("emoji exceeds configured max size: %s", maxsz)
return nil, gtserror.NewErrorBadRequest(errors.New(text), text)
}
// Open multipart file reader.
mpfile, err := image.Open()
if err != nil {
err := gtserror.Newf("error opening multipart file: %w", err)
return nil, gtserror.NewErrorInternalError(err)
}
// Wrap the multipart file reader to ensure is limited to max.
rc, _, _ := iotools.UpdateReadCloserLimit(mpfile, int64(maxsz))
data := func(context.Context) (io.ReadCloser, error) {
return rc, nil
}
// Prepare emoji model for recache from new data.

View File

@@ -21,6 +21,7 @@ import (
"context"
"fmt"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
@@ -35,8 +36,9 @@ func (p *Processor) MediaRefetch(ctx context.Context, requestingAccount *gtsmode
}
go func() {
ctx := gtscontext.WithValues(context.Background(), ctx)
log.Info(ctx, "starting emoji refetch")
refetched, err := p.media.RefetchEmojis(context.Background(), domain, transport.DereferenceMedia)
refetched, err := p.media.RefetchEmojis(ctx, domain, transport.DereferenceMedia)
if err != nil {
log.Errorf(ctx, "error refetching emojis: %s", err)
} else {

View File

@@ -19,10 +19,13 @@ package media
import (
"context"
"errors"
"fmt"
"io"
"codeberg.org/gruf/go-iotools"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/media"
@@ -30,21 +33,39 @@ import (
// Create creates a new media attachment belonging to the given account, using the request form.
func (p *Processor) Create(ctx context.Context, account *gtsmodel.Account, form *apimodel.AttachmentRequest) (*apimodel.Attachment, gtserror.WithCode) {
data := func(_ context.Context) (io.ReadCloser, int64, error) {
f, err := form.File.Open()
return f, form.File.Size, err
// Get maximum supported local media size.
maxsz := config.GetMediaLocalMaxSize()
// Ensure media within size bounds.
if form.File.Size > int64(maxsz) {
text := fmt.Sprintf("media exceeds configured max size: %s", maxsz)
return nil, gtserror.NewErrorBadRequest(errors.New(text), text)
}
// Parse focus details from API form input.
focusX, focusY, err := parseFocus(form.Focus)
if err != nil {
err := fmt.Errorf("could not parse focus value %s: %s", form.Focus, err)
return nil, gtserror.NewErrorBadRequest(err, err.Error())
text := fmt.Sprintf("could not parse focus value %s: %s", form.Focus, err)
return nil, gtserror.NewErrorBadRequest(errors.New(text), text)
}
// Open multipart file reader.
mpfile, err := form.File.Open()
if err != nil {
err := gtserror.Newf("error opening multipart file: %w", err)
return nil, gtserror.NewErrorInternalError(err)
}
// Wrap the multipart file reader to ensure is limited to max.
rc, _, _ := iotools.UpdateReadCloserLimit(mpfile, int64(maxsz))
// Create local media and write to instance storage.
attachment, errWithCode := p.c.StoreLocalMedia(ctx,
account.ID,
data,
func(ctx context.Context) (reader io.ReadCloser, err error) {
return rc, nil
},
media.AdditionalMediaInfo{
Description: &form.Description,
FocusX: &focusX,

View File

@@ -18,7 +18,6 @@
package media_test
import (
"bytes"
"context"
"io"
"path"
@@ -87,9 +86,9 @@ func (suite *GetFileTestSuite) TestGetRemoteFileUncached() {
MediaSize: string(media.SizeOriginal),
FileName: fileName,
})
suite.NoError(errWithCode)
suite.NotNil(content)
b, err := io.ReadAll(content.Content)
suite.NoError(err)
suite.NoError(content.Content.Close())
@@ -111,7 +110,7 @@ func (suite *GetFileTestSuite) TestGetRemoteFileUncached() {
suite.True(*dbAttachment.Cached)
// the file should be back in storage at the same path as before
refreshedBytes, err := suite.storage.Get(ctx, testAttachment.File.Path)
refreshedBytes, err := suite.storage.Get(ctx, dbAttachment.File.Path)
suite.NoError(err)
suite.Equal(suite.testRemoteAttachments[testAttachment.RemoteURL].Data, refreshedBytes)
}
@@ -139,32 +138,26 @@ func (suite *GetFileTestSuite) TestGetRemoteFileUncachedInterrupted() {
MediaSize: string(media.SizeOriginal),
FileName: fileName,
})
suite.NoError(errWithCode)
suite.NotNil(content)
// only read the first kilobyte and then stop
b := make([]byte, 0, 1024)
if !testrig.WaitFor(func() bool {
read, err := io.CopyN(bytes.NewBuffer(b), content.Content, 1024)
return err == nil && read == 1024
}) {
suite.FailNow("timed out trying to read first 1024 bytes")
}
_, err = io.CopyN(io.Discard, content.Content, 1024)
suite.NoError(err)
// close the reader
suite.NoError(content.Content.Close())
err = content.Content.Close()
suite.NoError(err)
// the attachment should still be updated in the database even though the caller hung up
var dbAttachment *gtsmodel.MediaAttachment
if !testrig.WaitFor(func() bool {
dbAttachment, _ := suite.db.GetAttachmentByID(ctx, testAttachment.ID)
dbAttachment, _ = suite.db.GetAttachmentByID(ctx, testAttachment.ID)
return *dbAttachment.Cached
}) {
suite.FailNow("timed out waiting for attachment to be updated")
}
// the file should be back in storage at the same path as before
refreshedBytes, err := suite.storage.Get(ctx, testAttachment.File.Path)
refreshedBytes, err := suite.storage.Get(ctx, dbAttachment.File.Path)
suite.NoError(err)
suite.Equal(suite.testRemoteAttachments[testAttachment.RemoteURL].Data, refreshedBytes)
}
@@ -196,9 +189,9 @@ func (suite *GetFileTestSuite) TestGetRemoteFileThumbnailUncached() {
MediaSize: string(media.SizeSmall),
FileName: fileName,
})
suite.NoError(errWithCode)
suite.NotNil(content)
b, err := io.ReadAll(content.Content)
suite.NoError(err)
suite.NoError(content.Content.Close())