[performance] overhaul struct (+ result) caching library for simplicity, performance and multiple-result lookups (#2535)

* rewrite cache library as codeberg.org/gruf/go-structr, implement in gotosocial

* use actual go-structr release version (not just commit hash)

* revert go toolchain changes (damn you go for auto changing this)

* fix go mod woes

* ensure %w is used in calls to errs.Appendf()

* fix error checking

* fix possible panic

* remove unnecessary start/stop functions, move to main Cache{} struct, add note regarding which caches require start/stop

* fix copy-paste artifact... 😇

* fix all comment copy-paste artifacts

* remove dropID() function, now we can just use slices.DeleteFunc()

* use util.Deduplicate() instead of collate(), move collate to util

* move orderByIDs() to util package and "generify"

* add a util.DeleteIf() function, use this to delete entries on failed population

* use slices.DeleteFunc() instead of util.DeleteIf() (i had the logic mixed up in my head somehow lol)

* add note about how collate differs from deduplicate
This commit is contained in:
kim
2024-01-19 12:57:29 +00:00
committed by GitHub
parent 67e11a1a61
commit 7ec1e1332e
66 changed files with 4038 additions and 2711 deletions

View File

@@ -1,433 +0,0 @@
package result
import (
"context"
"reflect"
_ "unsafe"
"codeberg.org/gruf/go-cache/v3/simple"
"codeberg.org/gruf/go-errors/v2"
)
// Lookup represents a struct object lookup method in the cache.
type Lookup struct {
// Name is a period ('.') separated string
// of struct fields this Key encompasses.
Name string
// AllowZero indicates whether to accept and cache
// under zero value keys, otherwise ignore them.
AllowZero bool
// Multi allows specifying a key capable of storing
// multiple results. Note this only supports invalidate.
Multi bool
}
// Cache provides a means of caching value structures, along with
// the results of attempting to load them. An example usecase of this
// cache would be in wrapping a database, allowing caching of sql.ErrNoRows.
type Cache[T any] struct {
cache simple.Cache[int64, *result] // underlying result cache
lookups structKeys // pre-determined struct lookups
invalid func(T) // store unwrapped invalidate callback.
ignore func(error) bool // determines cacheable errors
copy func(T) T // copies a Value type
next int64 // update key counter
}
// New returns a new initialized Cache, with given lookups, underlying value copy function and provided capacity.
func New[T any](lookups []Lookup, copy func(T) T, cap int) *Cache[T] {
var z T
// Determine generic type
t := reflect.TypeOf(z)
// Iteratively deref pointer type
for t.Kind() == reflect.Pointer {
t = t.Elem()
}
// Ensure that this is a struct type
if t.Kind() != reflect.Struct {
panic("generic parameter type must be struct (or ptr to)")
}
// Allocate new cache object
c := new(Cache[T])
c.copy = copy // use copy fn.
c.lookups = make([]structKey, len(lookups))
for i, lookup := range lookups {
// Create keyed field info for lookup
c.lookups[i] = newStructKey(lookup, t)
}
// Create and initialize underlying cache
c.cache.Init(0, cap)
c.SetEvictionCallback(nil)
c.SetInvalidateCallback(nil)
c.IgnoreErrors(nil)
return c
}
// SetEvictionCallback sets the eviction callback to the provided hook.
func (c *Cache[T]) SetEvictionCallback(hook func(T)) {
if hook == nil {
// Ensure non-nil hook.
hook = func(T) {}
}
c.cache.SetEvictionCallback(func(pkey int64, res *result) {
c.cache.Lock()
for _, key := range res.Keys {
// Delete key->pkey lookup
pkeys := key.info.pkeys
delete(pkeys, key.key)
}
c.cache.Unlock()
if res.Error != nil {
// Skip value hooks
putResult(res)
return
}
// Free result and call hook.
v := res.Value.(T)
putResult(res)
hook(v)
})
}
// SetInvalidateCallback sets the invalidate callback to the provided hook.
func (c *Cache[T]) SetInvalidateCallback(hook func(T)) {
if hook == nil {
// Ensure non-nil hook.
hook = func(T) {}
} // store hook.
c.invalid = hook
c.cache.SetInvalidateCallback(func(pkey int64, res *result) {
c.cache.Lock()
for _, key := range res.Keys {
// Delete key->pkey lookup
pkeys := key.info.pkeys
delete(pkeys, key.key)
}
c.cache.Unlock()
if res.Error != nil {
// Skip value hooks
putResult(res)
return
}
// Free result and call hook.
v := res.Value.(T)
putResult(res)
hook(v)
})
}
// IgnoreErrors allows setting a function hook to determine which error types should / not be cached.
func (c *Cache[T]) IgnoreErrors(ignore func(error) bool) {
if ignore == nil {
ignore = func(err error) bool {
return errors.Is(err, context.Canceled) ||
errors.Is(err, context.DeadlineExceeded)
}
}
c.cache.Lock()
c.ignore = ignore
c.cache.Unlock()
}
// Load will attempt to load an existing result from the cacche for the given lookup and key parts, else calling the provided load function and caching the result.
func (c *Cache[T]) Load(lookup string, load func() (T, error), keyParts ...any) (T, error) {
info := c.lookups.get(lookup)
key := info.genKey(keyParts)
return c.load(info, key, load)
}
// Has checks the cache for a positive result under the given lookup and key parts.
func (c *Cache[T]) Has(lookup string, keyParts ...any) bool {
info := c.lookups.get(lookup)
key := info.genKey(keyParts)
return c.has(info, key)
}
// Store will call the given store function, and on success store the value in the cache as a positive result.
func (c *Cache[T]) Store(value T, store func() error) error {
// Attempt to store this value.
if err := store(); err != nil {
return err
}
// Prepare cached result.
result := getResult()
result.Keys = c.lookups.generate(value)
result.Value = c.copy(value)
result.Error = nil
var evict func()
// Lock cache.
c.cache.Lock()
defer func() {
// Unlock cache.
c.cache.Unlock()
if evict != nil {
// Call evict.
evict()
}
// Call invalidate.
c.invalid(value)
}()
// Store result in cache.
evict = c.store(result)
return nil
}
// Invalidate will invalidate any result from the cache found under given lookup and key parts.
func (c *Cache[T]) Invalidate(lookup string, keyParts ...any) {
info := c.lookups.get(lookup)
key := info.genKey(keyParts)
c.invalidate(info, key)
}
// Clear empties the cache, calling the invalidate callback where necessary.
func (c *Cache[T]) Clear() { c.Trim(100) }
// Trim ensures the cache stays within percentage of total capacity, truncating where necessary.
func (c *Cache[T]) Trim(perc float64) { c.cache.Trim(perc) }
func (c *Cache[T]) load(lookup *structKey, key string, load func() (T, error)) (T, error) {
if !lookup.unique { // ensure this lookup only returns 1 result
panic("non-unique lookup does not support load: " + lookup.name)
}
var (
zero T
res *result
)
// Acquire cache lock
c.cache.Lock()
// Look for primary key for cache key (only accept len=1)
if pkeys := lookup.pkeys[key]; len(pkeys) == 1 {
// Fetch the result for primary key
entry, ok := c.cache.Cache.Get(pkeys[0])
if ok {
// Since the invalidation / eviction hooks acquire a mutex
// lock separately, and only at this point are the pkeys
// updated, there is a chance that a primary key may return
// no matching entry. Hence we have to check for it here.
res = entry.Value.(*result)
}
}
// Done with lock
c.cache.Unlock()
if res == nil {
// Generate fresh result.
value, err := load()
if err != nil {
if c.ignore(err) {
// don't cache this error type
return zero, err
}
// Alloc result.
res = getResult()
// Store error result.
res.Error = err
// This load returned an error, only
// store this item under provided key.
res.Keys = []cacheKey{{
info: lookup,
key: key,
}}
} else {
// Alloc result.
res = getResult()
// Store value result.
res.Value = value
// This was a successful load, generate keys.
res.Keys = c.lookups.generate(res.Value)
}
var evict func()
// Lock cache.
c.cache.Lock()
defer func() {
// Unlock cache.
c.cache.Unlock()
if evict != nil {
// Call evict.
evict()
}
}()
// Store result in cache.
evict = c.store(res)
}
// Catch and return cached error
if err := res.Error; err != nil {
return zero, err
}
// Copy value from cached result.
v := c.copy(res.Value.(T))
return v, nil
}
func (c *Cache[T]) has(lookup *structKey, key string) bool {
var res *result
// Acquire cache lock
c.cache.Lock()
// Look for primary key for cache key (only accept len=1)
if pkeys := lookup.pkeys[key]; len(pkeys) == 1 {
// Fetch the result for primary key
entry, ok := c.cache.Cache.Get(pkeys[0])
if ok {
// Since the invalidation / eviction hooks acquire a mutex
// lock separately, and only at this point are the pkeys
// updated, there is a chance that a primary key may return
// no matching entry. Hence we have to check for it here.
res = entry.Value.(*result)
}
}
// Check for result AND non-error result.
ok := (res != nil && res.Error == nil)
// Done with lock
c.cache.Unlock()
return ok
}
func (c *Cache[T]) store(res *result) (evict func()) {
var toEvict []*result
// Get primary key
res.PKey = c.next
c.next++
if res.PKey > c.next {
panic("cache primary key overflow")
}
for _, key := range res.Keys {
// Look for cache primary keys.
pkeys := key.info.pkeys[key.key]
if key.info.unique && len(pkeys) > 0 {
for _, conflict := range pkeys {
// Get the overlapping result with this key.
entry, ok := c.cache.Cache.Get(conflict)
if !ok {
// Since the invalidation / eviction hooks acquire a mutex
// lock separately, and only at this point are the pkeys
// updated, there is a chance that a primary key may return
// no matching entry. Hence we have to check for it here.
continue
}
// From conflicting entry, drop this key, this
// will prevent eviction cleanup key confusion.
confRes := entry.Value.(*result)
confRes.Keys.drop(key.info.name)
if len(res.Keys) == 0 {
// We just over-wrote the only lookup key for
// this value, so we drop its primary key too.
_ = c.cache.Cache.Delete(conflict)
// Add finished result to evict queue.
toEvict = append(toEvict, confRes)
}
}
// Drop existing.
pkeys = pkeys[:0]
}
// Store primary key lookup.
pkeys = append(pkeys, res.PKey)
key.info.pkeys[key.key] = pkeys
}
// Acquire new cache entry.
entry := simple.GetEntry()
entry.Key = res.PKey
entry.Value = res
evictFn := func(_ int64, entry *simple.Entry) {
// on evict during set, store evicted result.
toEvict = append(toEvict, entry.Value.(*result))
}
// Store main entry under primary key, catch evicted.
c.cache.Cache.SetWithHook(res.PKey, entry, evictFn)
if len(toEvict) == 0 {
// none evicted.
return nil
}
return func() {
for i := range toEvict {
// Rescope result.
res := toEvict[i]
// Call evict hook on each entry.
c.cache.Evict(res.PKey, res)
}
}
}
func (c *Cache[T]) invalidate(lookup *structKey, key string) {
// Look for primary key for cache key
c.cache.Lock()
pkeys := lookup.pkeys[key]
delete(lookup.pkeys, key)
c.cache.Unlock()
// Invalidate all primary keys.
c.cache.InvalidateAll(pkeys...)
}
type result struct {
// Result primary key
PKey int64
// keys accessible under
Keys cacheKeys
// cached value
Value any
// cached error
Error error
}

View File

@@ -1,282 +0,0 @@
package result
import (
"fmt"
"reflect"
"strings"
"sync"
"unicode"
"unicode/utf8"
"codeberg.org/gruf/go-byteutil"
"codeberg.org/gruf/go-mangler"
)
// structKeys provides convience methods for a list
// of structKey field combinations used for cache keys.
type structKeys []structKey
// get fetches the structKey info for given lookup name (else, panics).
func (sk structKeys) get(name string) *structKey {
for i := range sk {
if sk[i].name == name {
return &sk[i]
}
}
panic("unknown lookup: \"" + name + "\"")
}
// generate will calculate and produce a slice of cache keys the given value
// can be stored under in the, as determined by receiving struct keys.
func (sk structKeys) generate(a any) []cacheKey {
var keys []cacheKey
// Get reflected value in order
// to access the struct fields
v := reflect.ValueOf(a)
// Iteratively deref pointer value
for v.Kind() == reflect.Pointer {
if v.IsNil() {
panic("nil ptr")
}
v = v.Elem()
}
// Acquire buffer
buf := getBuf()
outer:
for i := range sk {
// Reset buffer
buf.Reset()
// Append each field value to buffer.
for _, field := range sk[i].fields {
fv := v.Field(field.index)
fi := fv.Interface()
// Mangle this key part into buffer.
ok := field.manglePart(buf, fi)
if !ok {
// don't generate keys
// for zero value parts.
continue outer
}
// Append part separator.
buf.B = append(buf.B, '.')
}
// Drop last '.'
buf.Truncate(1)
// Append new cached key to slice
keys = append(keys, cacheKey{
info: &sk[i],
key: string(buf.B), // copy
})
}
// Release buf
putBuf(buf)
return keys
}
type cacheKeys []cacheKey
// drop will drop the cachedKey with lookup name from receiving cacheKeys slice.
func (ck *cacheKeys) drop(name string) {
_ = *ck // move out of loop
for i := range *ck {
if (*ck)[i].info.name == name {
(*ck) = append((*ck)[:i], (*ck)[i+1:]...)
break
}
}
}
// cacheKey represents an actual cached key.
type cacheKey struct {
// info is a reference to the structKey this
// cacheKey is representing. This is a shared
// reference and as such only the structKey.pkeys
// lookup map is expecting to be modified.
info *structKey
// value is the actual string representing
// this cache key for hashmap lookups.
key string
}
// structKey represents a list of struct fields
// encompassing a single cache key, the string name
// of the lookup, the lookup map to primary cache
// keys, and the key's possible zero value string.
type structKey struct {
// name is the provided cache lookup name for
// this particular struct key, consisting of
// period ('.') separated struct field names.
name string
// unique determines whether this structKey supports
// multiple or just the singular unique result.
unique bool
// fields is a slice of runtime struct field
// indices, of fields encompassed by this key.
fields []structField
// pkeys is a lookup of stored struct key values
// to the primary cache lookup key (int64). this
// is protected by the main cache mutex.
pkeys map[string][]int64
}
// newStructKey will generate a structKey{} information object for user-given lookup
// key information, and the receiving generic paramter's type information. Panics on error.
func newStructKey(lk Lookup, t reflect.Type) structKey {
var sk structKey
// Set the lookup name
sk.name = lk.Name
// Split dot-separated lookup to get
// the individual struct field names
names := strings.Split(lk.Name, ".")
// Allocate the mangler and field indices slice.
sk.fields = make([]structField, len(names))
for i, name := range names {
// Get field info for given name
ft, ok := t.FieldByName(name)
if !ok {
panic("no field found for name: \"" + name + "\"")
}
// Check field is usable
if !isExported(name) {
panic("field must be exported")
}
// Set the runtime field index
sk.fields[i].index = ft.Index[0]
// Allocate new instance of field
v := reflect.New(ft.Type)
v = v.Elem()
// Fetch mangler for field type.
sk.fields[i].mangle = mangler.Get(ft.Type)
if !lk.AllowZero {
// Append the mangled zero value interface
zero := sk.fields[i].mangle(nil, v.Interface())
sk.fields[i].zero = string(zero)
}
}
// Set unique lookup flag.
sk.unique = !lk.Multi
// Allocate primary lookup map
sk.pkeys = make(map[string][]int64)
return sk
}
// genKey generates a cache key string for given key parts (i.e. serializes them using "go-mangler").
func (sk *structKey) genKey(parts []any) string {
// Check this expected no. key parts.
if len(parts) != len(sk.fields) {
panic(fmt.Sprintf("incorrect no. key parts provided: want=%d received=%d", len(parts), len(sk.fields)))
}
// Acquire buffer
buf := getBuf()
buf.Reset()
for i, part := range parts {
// Mangle this key part into buffer.
// specifically ignoring whether this
// is returning a zero value key part.
_ = sk.fields[i].manglePart(buf, part)
// Append part separator.
buf.B = append(buf.B, '.')
}
// Drop last '.'
buf.Truncate(1)
// Create str copy
str := string(buf.B)
// Release buf
putBuf(buf)
return str
}
type structField struct {
// index is the reflect index of this struct field.
index int
// zero is the possible zero value for this
// key part. if set, this will _always_ be
// non-empty due to how the mangler works.
//
// i.e. zero = "" --> allow zero value keys
// zero != "" --> don't allow zero value keys
zero string
// mangle is the mangler function for
// serializing values of this struct field.
mangle mangler.Mangler
}
// manglePart ...
func (field *structField) manglePart(buf *byteutil.Buffer, part any) bool {
// Start of part bytes.
start := len(buf.B)
// Mangle this key part into buffer.
buf.B = field.mangle(buf.B, part)
// End of part bytes.
end := len(buf.B)
// Return whether this is zero value.
return (field.zero == "" ||
string(buf.B[start:end]) != field.zero)
}
// isExported checks whether function name is exported.
func isExported(fnName string) bool {
r, _ := utf8.DecodeRuneInString(fnName)
return unicode.IsUpper(r)
}
// bufpool provides a memory pool of byte
// buffers use when encoding key types.
var bufPool = sync.Pool{
New: func() any {
return &byteutil.Buffer{B: make([]byte, 0, 512)}
},
}
// getBuf acquires a byte buffer from memory pool.
func getBuf() *byteutil.Buffer {
return bufPool.Get().(*byteutil.Buffer)
}
// putBuf replaces a byte buffer back in memory pool.
func putBuf(buf *byteutil.Buffer) {
if buf.Cap() > int(^uint16(0)) {
return // drop large bufs
}
bufPool.Put(buf)
}

View File

@@ -1,25 +0,0 @@
package result
import "sync"
// resultPool is a global pool for result
// objects, regardless of cache type.
var resultPool sync.Pool
// getEntry fetches a result from pool, or allocates new.
func getResult() *result {
v := resultPool.Get()
if v == nil {
return new(result)
}
return v.(*result)
}
// putResult replaces a result in the pool.
func putResult(r *result) {
r.PKey = 0
r.Keys = nil
r.Value = nil
r.Error = nil
resultPool.Put(r)
}