[bugfix] fix possible infinite loops in media / emoji cleanup (#2590)

* update media / emoji cleaner funcs to use new paging package, check for same returned maxID

* fix other calls of getattachments and getmojis not using paging

* use alternative order-by function
This commit is contained in:
kim 2024-01-31 13:31:53 +00:00 committed by GitHub
parent 0f7a2024c3
commit c9452f32f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 158 additions and 119 deletions

View File

@ -31,14 +31,14 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/db/bundb"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/paging"
"github.com/superseriousbusiness/gotosocial/internal/state"
)
type list struct {
dbService db.DB
state *state.State
maxID string
limit int
page paging.Page
localOnly bool
remoteOnly bool
out *bufio.Writer
@ -47,31 +47,32 @@ type list struct {
// Get a list of attachment using a custom filter
func (l *list) GetAllAttachmentPaths(ctx context.Context, filter func(*gtsmodel.MediaAttachment) string) ([]string, error) {
res := make([]string, 0, 100)
for {
attachments, err := l.dbService.GetAttachments(ctx, l.maxID, l.limit)
if err != nil {
// Get the next page of media attachments up to max ID.
attachments, err := l.dbService.GetAttachments(ctx, &l.page)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, fmt.Errorf("failed to retrieve media metadata from database: %w", err)
}
// Get current max ID.
maxID := l.page.Max.Value
// If no attachments or the same group is returned, we reached the end.
if len(attachments) == 0 || maxID == attachments[len(attachments)-1].ID {
break
}
// Use last ID as the next 'maxID' value.
maxID = attachments[len(attachments)-1].ID
l.page.Max = paging.MaxID(maxID)
for _, a := range attachments {
v := filter(a)
if v != "" {
res = append(res, v)
}
}
// If we got less results than our limit, we've reached the
// last page to retrieve and we can break the loop. If the
// last batch happens to contain exactly the same amount of
// items as the limit we'll end up doing one extra query.
if len(attachments) < l.limit {
break
}
// Grab the last ID from the batch and set it as the maxID
// that'll be used in the next iteration so we don't get items
// we've already seen.
l.maxID = attachments[len(attachments)-1].ID
}
return res, nil
}
@ -80,30 +81,30 @@ func (l *list) GetAllAttachmentPaths(ctx context.Context, filter func(*gtsmodel.
func (l *list) GetAllEmojisPaths(ctx context.Context, filter func(*gtsmodel.Emoji) string) ([]string, error) {
res := make([]string, 0, 100)
for {
attachments, err := l.dbService.GetEmojis(ctx, l.maxID, l.limit)
// Get the next page of emoji media up to max ID.
attachments, err := l.dbService.GetEmojis(ctx, &l.page)
if err != nil {
return nil, fmt.Errorf("failed to retrieve media metadata from database: %w", err)
}
// Get current max ID.
maxID := l.page.Max.Value
// If no attachments or the same group is returned, we reached the end.
if len(attachments) == 0 || maxID == attachments[len(attachments)-1].ID {
break
}
// Use last ID as the next 'maxID' value.
maxID = attachments[len(attachments)-1].ID
l.page.Max = paging.MaxID(maxID)
for _, a := range attachments {
v := filter(a)
if v != "" {
res = append(res, v)
}
}
// If we got less results than our limit, we've reached the
// last page to retrieve and we can break the loop. If the
// last batch happens to contain exactly the same amount of
// items as the limit we'll end up doing one extra query.
if len(attachments) < l.limit {
break
}
// Grab the last ID from the batch and set it as the maxID
// that'll be used in the next iteration so we don't get items
// we've already seen.
l.maxID = attachments[len(attachments)-1].ID
}
return res, nil
}
@ -137,8 +138,7 @@ func setupList(ctx context.Context) (*list, error) {
return &list{
dbService: dbService,
state: &state,
limit: 200,
maxID: "",
page: paging.Page{Limit: 200},
localOnly: localOnly,
remoteOnly: remoteOnly,
out: bufio.NewWriter(os.Stdout),

View File

@ -27,6 +27,7 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/paging"
)
// Emoji encompasses a set of
@ -105,8 +106,9 @@ func (e *Emoji) UncacheRemote(ctx context.Context, olderThan time.Time) (int, er
return total, gtserror.Newf("error getting remote emoji: %w", err)
}
if len(emojis) == 0 {
// reached end.
// If no emojis / same group is returned, we reached the end.
if len(emojis) == 0 ||
olderThan.Equal(emojis[len(emojis)-1].CreatedAt) {
break
}
@ -140,23 +142,30 @@ func (e *Emoji) UncacheRemote(ctx context.Context, olderThan time.Time) (int, er
func (e *Emoji) FixBroken(ctx context.Context) (int, error) {
var (
total int
maxID string
page paging.Page
)
// Set page select limit.
page.Limit = selectLimit
for {
// Fetch the next batch of emoji media up to next ID.
emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit)
// Fetch the next batch of emoji to next max ID.
emojis, err := e.state.DB.GetEmojis(ctx, &page)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return total, gtserror.Newf("error getting emojis: %w", err)
}
if len(emojis) == 0 {
// reached end.
// Get current max ID.
maxID := page.Max.Value
// If no emoji or the same group is returned, we reached end.
if len(emojis) == 0 || maxID == emojis[len(emojis)-1].ID {
break
}
// Use last as the next 'maxID' value.
// Use last ID as the next 'maxID'.
maxID = emojis[len(emojis)-1].ID
page.Max = paging.MaxID(maxID)
for _, emoji := range emojis {
// Check / fix missing broken emoji.
@ -182,23 +191,30 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) {
func (e *Emoji) PruneUnused(ctx context.Context) (int, error) {
var (
total int
maxID string
page paging.Page
)
// Set page select limit.
page.Limit = selectLimit
for {
// Fetch the next batch of emoji media up to next ID.
emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit)
// Fetch the next batch of emoji to next max ID.
emojis, err := e.state.DB.GetRemoteEmojis(ctx, &page)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return total, gtserror.Newf("error getting remote emojis: %w", err)
}
if len(emojis) == 0 {
// reached end.
// Get current max ID.
maxID := page.Max.Value
// If no emoji or the same group is returned, we reached end.
if len(emojis) == 0 || maxID == emojis[len(emojis)-1].ID {
break
}
// Use last as the next 'maxID' value.
// Use last ID as the next 'maxID'.
maxID = emojis[len(emojis)-1].ID
page.Max = paging.MaxID(maxID)
for _, emoji := range emojis {
// Check / prune unused emoji media.
@ -224,23 +240,30 @@ func (e *Emoji) PruneUnused(ctx context.Context) (int, error) {
func (e *Emoji) FixCacheStates(ctx context.Context) (int, error) {
var (
total int
maxID string
page paging.Page
)
// Set page select limit.
page.Limit = selectLimit
for {
// Fetch the next batch of emoji media up to next ID.
emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit)
// Fetch the next batch of emoji to next max ID.
emojis, err := e.state.DB.GetRemoteEmojis(ctx, &page)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return total, gtserror.Newf("error getting remote emojis: %w", err)
}
if len(emojis) == 0 {
// reached end.
// Get current max ID.
maxID := page.Max.Value
// If no emoji or the same group is returned, we reached end.
if len(emojis) == 0 || maxID == emojis[len(emojis)-1].ID {
break
}
// Use last as the next 'maxID' value.
// Use last ID as the next 'maxID'.
maxID = emojis[len(emojis)-1].ID
page.Max = paging.MaxID(maxID)
for _, emoji := range emojis {
// Check / fix required emoji cache states.

View File

@ -28,6 +28,7 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/paging"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/uris"
)
@ -128,23 +129,30 @@ func (m *Media) PruneOrphaned(ctx context.Context) (int, error) {
func (m *Media) PruneUnused(ctx context.Context) (int, error) {
var (
total int
maxID string
page paging.Page
)
// Set page select limit.
page.Limit = selectLimit
for {
// Fetch the next batch of media attachments up to next max ID.
attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit)
// Fetch the next batch of media attachments to next maxID.
attachments, err := m.state.DB.GetAttachments(ctx, &page)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return total, gtserror.Newf("error getting attachments: %w", err)
}
if len(attachments) == 0 {
// reached end.
// Get current max ID.
maxID := page.Max.Value
// If no attachments or the same group is returned, we reached the end.
if len(attachments) == 0 || maxID == attachments[len(attachments)-1].ID {
break
}
// Use last ID as the next 'maxID' value.
maxID = attachments[len(attachments)-1].ID
page.Max = paging.MaxID(maxID)
for _, media := range attachments {
// Check / prune unused media attachment.
@ -183,8 +191,9 @@ func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, er
return total, gtserror.Newf("error getting remote attachments: %w", err)
}
if len(attachments) == 0 {
// reached end.
// If no attachments / same group is returned, we reached the end.
if len(attachments) == 0 ||
olderThan.Equal(attachments[len(attachments)-1].CreatedAt) {
break
}
@ -215,23 +224,29 @@ func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, er
func (m *Media) FixCacheStates(ctx context.Context) (int, error) {
var (
total int
maxID string
page paging.Page
)
// Set page select limit.
page.Limit = selectLimit
for {
// Fetch the next batch of media attachments up to next max ID.
attachments, err := m.state.DB.GetRemoteAttachments(ctx, maxID, selectLimit)
attachments, err := m.state.DB.GetRemoteAttachments(ctx, &page)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return total, gtserror.Newf("error getting remote attachments: %w", err)
}
// Get current max ID.
maxID := page.Max.Value
if len(attachments) == 0 {
// reached end.
// If no attachments or the same group is returned, we reached the end.
if len(attachments) == 0 || maxID == attachments[len(attachments)-1].ID {
break
}
// Use last ID as the next 'maxID' value.
maxID = attachments[len(attachments)-1].ID
page.Max = paging.MaxID(maxID)
for _, media := range attachments {
// Check / fix required media cache states.

View File

@ -30,6 +30,7 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/paging"
"github.com/superseriousbusiness/gotosocial/internal/state"
"github.com/superseriousbusiness/gotosocial/internal/util"
"github.com/uptrace/bun"
@ -326,8 +327,11 @@ func (e *emojiDB) GetEmojisBy(ctx context.Context, domain string, includeDisable
return e.GetEmojisByIDs(ctx, emojiIDs)
}
func (e *emojiDB) GetEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) {
var emojiIDs []string
func (e *emojiDB) GetEmojis(ctx context.Context, page *paging.Page) ([]*gtsmodel.Emoji, error) {
maxID := page.GetMax()
limit := page.GetLimit()
emojiIDs := make([]string, 0, limit)
q := e.db.NewSelect().
Table("emojis").
@ -349,8 +353,11 @@ func (e *emojiDB) GetEmojis(ctx context.Context, maxID string, limit int) ([]*gt
return e.GetEmojisByIDs(ctx, emojiIDs)
}
func (e *emojiDB) GetRemoteEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) {
var emojiIDs []string
func (e *emojiDB) GetRemoteEmojis(ctx context.Context, page *paging.Page) ([]*gtsmodel.Emoji, error) {
maxID := page.GetMax()
limit := page.GetLimit()
emojiIDs := make([]string, 0, limit)
q := e.db.NewSelect().
Table("emojis").

View File

@ -27,6 +27,7 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/paging"
"github.com/superseriousbusiness/gotosocial/internal/state"
"github.com/superseriousbusiness/gotosocial/internal/util"
"github.com/uptrace/bun"
@ -232,7 +233,10 @@ func (m *mediaDB) DeleteAttachment(ctx context.Context, id string) error {
return err
}
func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) {
func (m *mediaDB) GetAttachments(ctx context.Context, page *paging.Page) ([]*gtsmodel.MediaAttachment, error) {
maxID := page.GetMax()
limit := page.GetLimit()
attachmentIDs := make([]string, 0, limit)
q := m.db.NewSelect().
@ -255,7 +259,10 @@ func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) (
return m.GetAttachmentsByIDs(ctx, attachmentIDs)
}
func (m *mediaDB) GetRemoteAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) {
func (m *mediaDB) GetRemoteAttachments(ctx context.Context, page *paging.Page) ([]*gtsmodel.MediaAttachment, error) {
maxID := page.GetMax()
limit := page.GetLimit()
attachmentIDs := make([]string, 0, limit)
q := m.db.NewSelect().

View File

@ -22,6 +22,7 @@ import (
"time"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/paging"
)
// EmojiAllDomains can be used as the `domain` value in a GetEmojis
@ -47,10 +48,10 @@ type Emoji interface {
GetUseableEmojis(ctx context.Context) ([]*gtsmodel.Emoji, error)
// GetEmojis fetches all emojis with IDs less than 'maxID', up to a maximum of 'limit' emojis.
GetEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error)
GetEmojis(ctx context.Context, page *paging.Page) ([]*gtsmodel.Emoji, error)
// GetRemoteEmojis fetches all remote emojis with IDs less than 'maxID', up to a maximum of 'limit' emojis.
GetRemoteEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error)
GetRemoteEmojis(ctx context.Context, page *paging.Page) ([]*gtsmodel.Emoji, error)
// GetCachedEmojisOlderThan fetches all cached remote emojis with 'updated_at' greater than 'olderThan', up to a maximum of 'limit' emojis.
GetCachedEmojisOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.Emoji, error)

View File

@ -22,6 +22,7 @@ import (
"time"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/paging"
)
// Media contains functions related to creating/getting/removing media attachments.
@ -42,10 +43,10 @@ type Media interface {
DeleteAttachment(ctx context.Context, id string) error
// GetAttachments fetches media attachments up to a given max ID, and at most limit.
GetAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error)
GetAttachments(ctx context.Context, page *paging.Page) ([]*gtsmodel.MediaAttachment, error)
// GetRemoteAttachments fetches media attachments with a non-empty domain, up to a given max ID, and at most limit.
GetRemoteAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error)
GetRemoteAttachments(ctx context.Context, page *paging.Page) ([]*gtsmodel.MediaAttachment, error)
// GetCachedAttachmentsOlderThan gets limit n remote attachments (including avatars and headers) older than
// the given time. These will be returned in order of attachment.created_at descending (i.e. newest to oldest).

View File

@ -17,6 +17,8 @@
package util
import "slices"
// Deduplicate deduplicates entries in the given slice.
func Deduplicate[T comparable](in []T) []T {
var (
@ -47,6 +49,10 @@ func DeduplicateFunc[T any, C comparable](in []T, key func(v T) C) []T {
deduped = make([]T, 0, inL)
)
if key == nil {
panic("nil func")
}
for _, v := range in {
k := key(v)
@ -66,6 +72,10 @@ func DeduplicateFunc[T any, C comparable](in []T, key func(v T) C) []T {
// passing each item to 'get' and deduplicating the end result.
// Compared to Deduplicate() this returns []K, NOT input type []T.
func Collate[T any, K comparable](in []T, get func(T) K) []K {
if get == nil {
panic("nil func")
}
ks := make([]K, 0, len(in))
km := make(map[K]struct{}, len(in))
@ -86,50 +96,25 @@ func Collate[T any, K comparable](in []T, get func(T) K) []K {
// OrderBy orders a slice of given type by the provided alternative slice of comparable type.
func OrderBy[T any, K comparable](in []T, keys []K, key func(T) K) {
var (
start int
offset int
)
for i := 0; i < len(keys); i++ {
var (
// key at index.
k = keys[i]
// sentinel
// idx value.
idx = -1
)
// Look for model with key in slice.
for j := start; j < len(in); j++ {
if key(in[j]) == k {
idx = j
break
}
}
if idx == -1 {
// model with key
// was not found.
offset++
continue
}
// Update
// start
start++
// Expected ID index.
exp := i - offset
if idx == exp {
// Model is in expected
// location, keep going.
continue
}
// Swap models at current and expected.
in[idx], in[exp] = in[exp], in[idx]
if key == nil {
panic("nil func")
}
// Create lookup of keys->idx.
m := make(map[K]int, len(in))
for i, k := range keys {
m[k] = i
}
// Sort according to the reverse lookup.
slices.SortFunc(in, func(a, b T) int {
ai := m[key(a)]
bi := m[key(b)]
if ai < bi {
return -1
} else if bi < ai {
return +1
}
return 0
})
}