[performance] rewrite timelines to rely on new timeline cache type (#3941)

* start work rewriting timeline cache type

* further work rewriting timeline caching

* more work integration new timeline code

* remove old code

* add local timeline, fix up merge conflicts

* remove old use of go-bytes

* implement new timeline code into more areas of codebase, pull in latest go-mangler, go-mutexes, go-structr

* remove old timeline package, add local timeline cache

* remove references to old timeline types that needed starting up in tests

* start adding page validation

* fix test-identified timeline cache package issues

* fix up more tests, fix missing required changes, etc

* add exclusion for test.out in gitignore

* clarify some things better in code comments

* tweak cache size limits

* fix list timeline cache fetching

* further list timeline fixes

* linter, ssssssssshhhhhhhhhhhh please

* fix linter hints

* reslice the output if it's beyond length of 'lim'

* remove old timeline initialization code, bump go-structr to v0.9.4

* continued from previous commit

* improved code comments

* don't allow multiple entries for BoostOfID values to prevent repeated boosts of same boosts

* finish writing more code comments

* some variable renaming, for ease of following

* change the way we update lo,hi paging values during timeline load

* improved code comments for updated / returned lo , hi paging values

* finish writing code comments for the StatusTimeline{} type itself

* fill in more code comments

* update go-structr version to latest with changed timeline unique indexing logic

* have a local and public timeline *per user*

* rewrite calls to public / local timeline calls

* remove the zero length check, as lo, hi values might still be set

* simplify timeline cache loading, fix lo/hi returns, fix timeline invalidation side-effects missing for some federated actions

* swap the lo, hi values 🤦

* add (now) missing slice reverse of tag timeline statuses when paging ASC

* remove local / public caches (is out of scope for this work), share more timeline code

* remove unnecessary change

* again, remove more unused code

* remove unused function to appease the linter

* move boost checking to prepare function

* fix use of timeline.lastOrder, fix incorrect range functions used

* remove comments for repeat code

* remove the boost logic from prepare function

* do a maximum of 5 loads, not 10

* add repeat boost filtering logic, update go-structr, general improvements

* more code comments

* add important note

* fix timeline tests now that timelines are returned in page order

* remove unused field

* add StatusTimeline{} tests

* add more status timeline tests

* start adding preloading support

* ensure repeat boosts are marked in preloaded entries

* share a bunch of the database load code in timeline cache, don't clear timelines on relationship change

* add logic to allow dynamic clear / preloading of timelines

* comment-out unused functions, but leave in place as we might end-up using them

* fix timeline preload state check

* much improved status timeline code comments

* more code comments, don't bother inserting statuses if timeline not preloaded

* shift around some logic to make sure things aren't accidentally left set

* finish writing code comments

* remove trim-after-insert behaviour

* fix-up some comments referring to old logic

* remove unsetting of lo, hi

* fix preload repeatBoost checking logic

* don't return on status filter errors, these are usually transient

* better concurrency safety in Clear() and Done()

* fix test broken due to addition of preloader

* fix repeatBoost logic that doesn't account for already-hidden repeatBoosts

* ensure edit submodels are dropped on cache insertion

* update code-comment to expand CAS accronym

* use a plus1hULID() instead of 24h

* remove unused functions

* add note that public / local timeline requester can be nil

* fix incorrect visibility filtering of tag timeline statuses

* ensure we filter home timeline statuses on local only

* some small re-orderings to confirm query params in correct places

* fix the local only home timeline filter func
This commit is contained in:
kim
2025-04-26 09:56:15 +00:00
committed by GitHub
parent ffde1b150f
commit 6a6a499333
108 changed files with 2935 additions and 5213 deletions

View File

@@ -19,152 +19,143 @@ package timeline
import (
"context"
"errors"
"strconv"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/db"
statusfilter "github.com/superseriousbusiness/gotosocial/internal/filter/status"
"github.com/superseriousbusiness/gotosocial/internal/filter/usermute"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/util"
"github.com/superseriousbusiness/gotosocial/internal/paging"
)
// PublicTimelineGet gets a pageable timeline of public statuses
// for the given requesting account. It ensures that each status
// in timeline is visible to the account before returning it.
//
// The local argument limits this to local-only statuses.
func (p *Processor) PublicTimelineGet(
ctx context.Context,
requester *gtsmodel.Account,
maxID string,
sinceID string,
minID string,
limit int,
page *paging.Page,
local bool,
) (*apimodel.PageableResponse, gtserror.WithCode) {
const maxAttempts = 3
var (
nextMaxIDValue string
prevMinIDValue string
items = make([]any, 0, limit)
)
var filters []*gtsmodel.Filter
var compiledMutes *usermute.CompiledUserMuteList
if requester != nil {
var err error
filters, err = p.state.DB.GetFiltersForAccountID(ctx, requester.ID)
if err != nil {
err = gtserror.Newf("couldn't retrieve filters for account %s: %w", requester.ID, err)
return nil, gtserror.NewErrorInternalError(err)
}
mutes, err := p.state.DB.GetAccountMutes(gtscontext.SetBarebones(ctx), requester.ID, nil)
if err != nil {
err = gtserror.Newf("couldn't retrieve mutes for account %s: %w", requester.ID, err)
return nil, gtserror.NewErrorInternalError(err)
}
compiledMutes = usermute.NewCompiledUserMuteList(mutes)
) (
*apimodel.PageableResponse,
gtserror.WithCode,
) {
if local {
return p.localTimelineGet(ctx, requester, page)
}
// Try a few times to select appropriate public
// statuses from the db, paging up or down to
// reattempt if nothing suitable is found.
outer:
for attempts := 1; ; attempts++ {
// Select slightly more than the limit to try to avoid situations where
// we filter out all the entries, and have to make another db call.
// It's cheaper to select more in 1 query than it is to do multiple queries.
statuses, err := p.state.DB.GetPublicTimeline(ctx, maxID, sinceID, minID, limit+5, local)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
err = gtserror.Newf("db error getting statuses: %w", err)
return nil, gtserror.NewErrorInternalError(err)
}
count := len(statuses)
if count == 0 {
// Nothing relevant (left) in the db.
return util.EmptyPageableResponse(), nil
}
// Page up from first status in slice
// (ie., one with the highest ID).
prevMinIDValue = statuses[0].ID
inner:
for _, s := range statuses {
// Push back the next page down ID to
// this status, regardless of whether
// we end up filtering it out or not.
nextMaxIDValue = s.ID
timelineable, err := p.visFilter.StatusPublicTimelineable(ctx, requester, s)
if err != nil {
log.Errorf(ctx, "error checking status visibility: %v", err)
continue inner
}
if !timelineable {
continue inner
}
apiStatus, err := p.converter.StatusToAPIStatus(ctx, s, requester, statusfilter.FilterContextPublic, filters, compiledMutes)
if errors.Is(err, statusfilter.ErrHideStatus) {
continue
}
if err != nil {
log.Errorf(ctx, "error converting to api status: %v", err)
continue inner
}
// Looks good, add this.
items = append(items, apiStatus)
// We called the db with a little
// more than the desired limit.
//
// Ensure we don't return more
// than the caller asked for.
if len(items) == limit {
break outer
}
}
if len(items) != 0 {
// We've got some items left after
// filtering, happily break + return.
break
}
if attempts >= maxAttempts {
// We reached our attempts limit.
// Be nice + warn about it.
log.Warn(ctx, "reached max attempts to find items in public timeline")
break
}
// We filtered out all items before we
// found anything we could return, but
// we still have attempts left to try
// fetching again. Set paging params
// and allow loop to continue.
if minID != "" {
// Paging up.
minID = prevMinIDValue
} else {
// Paging down.
maxID = nextMaxIDValue
}
}
return util.PackagePageableResponse(util.PageableResponseParams{
Items: items,
Path: "/api/v1/timelines/public",
NextMaxIDValue: nextMaxIDValue,
PrevMinIDValue: prevMinIDValue,
Limit: limit,
ExtraQueryParams: []string{
"local=" + strconv.FormatBool(local),
},
})
return p.publicTimelineGet(ctx, requester, page)
}
func (p *Processor) publicTimelineGet(
ctx context.Context,
requester *gtsmodel.Account,
page *paging.Page,
) (
*apimodel.PageableResponse,
gtserror.WithCode,
) {
return p.getStatusTimeline(ctx,
// Auth acconut,
// can be nil.
requester,
// No cache.
nil,
// Current
// page.
page,
// Public timeline endpoint.
"/api/v1/timelines/public",
// Set local-only timeline
// page query flag, (this map
// later gets copied before
// any further usage).
localOnlyFalse,
// Status filter context.
statusfilter.FilterContextPublic,
// Database load function.
func(pg *paging.Page) (statuses []*gtsmodel.Status, err error) {
return p.state.DB.GetPublicTimeline(ctx, pg)
},
// Pre-filtering function,
// i.e. filter before caching.
func(s *gtsmodel.Status) bool {
// Check the visibility of passed status to requesting user.
ok, err := p.visFilter.StatusPublicTimelineable(ctx, requester, s)
if err != nil {
log.Errorf(ctx, "error filtering status %s: %v", s.URI, err)
}
return !ok
},
// Post filtering funtion,
// i.e. filter after caching.
nil,
)
}
func (p *Processor) localTimelineGet(
ctx context.Context,
requester *gtsmodel.Account,
page *paging.Page,
) (
*apimodel.PageableResponse,
gtserror.WithCode,
) {
return p.getStatusTimeline(ctx,
// Auth acconut,
// can be nil.
requester,
// No cache.
nil,
// Current
// page.
page,
// Public timeline endpoint.
"/api/v1/timelines/public",
// Set local-only timeline
// page query flag, (this map
// later gets copied before
// any further usage).
localOnlyTrue,
// Status filter context.
statusfilter.FilterContextPublic,
// Database load function.
func(pg *paging.Page) (statuses []*gtsmodel.Status, err error) {
return p.state.DB.GetLocalTimeline(ctx, pg)
},
// Filtering function,
// i.e. filter before caching.
func(s *gtsmodel.Status) bool {
// Check the visibility of passed status to requesting user.
ok, err := p.visFilter.StatusPublicTimelineable(ctx, requester, s)
if err != nil {
log.Errorf(ctx, "error filtering status %s: %v", s.URI, err)
}
return !ok
},
// Post filtering funtion,
// i.e. filter after caching.
nil,
)
}