[performance] update storage backend and make use of seek syscall when available (#2924)

* update to use go-storage/ instead of go-store/v2/storage/

* pull in latest version from codeberg

* remove test output 😇

* add code comments

* set the exclusive bit when creating new files in disk config

* bump to actual release version

* bump to v0.1.1 (tis a simple no-logic change)

* update readme

* only use a temporary read seeker when decoding video if required (should only be S3 now)

* use fastcopy library to use memory pooled buffers when calling TempFileSeeker()

* update to use seek call in serveFileRange()
This commit is contained in:
kim
2024-05-22 09:46:24 +00:00
committed by GitHub
parent 06b1e0173b
commit 3d3e99ae52
91 changed files with 1610 additions and 12737 deletions

467
vendor/codeberg.org/gruf/go-storage/disk/disk.go generated vendored Normal file
View File

@@ -0,0 +1,467 @@
package disk
import (
"bytes"
"context"
"errors"
"io"
"io/fs"
"os"
"path"
"strings"
"syscall"
"codeberg.org/gruf/go-fastcopy"
"codeberg.org/gruf/go-fastpath/v2"
"codeberg.org/gruf/go-storage"
"codeberg.org/gruf/go-storage/internal"
)
// ensure DiskStorage conforms to storage.Storage.
var _ storage.Storage = (*DiskStorage)(nil)
// DefaultConfig returns the default DiskStorage configuration.
func DefaultConfig() Config {
return defaultConfig
}
// immutable default configuration.
var defaultConfig = Config{
OpenRead: OpenArgs{syscall.O_RDONLY, 0o644},
OpenWrite: OpenArgs{syscall.O_CREAT | syscall.O_WRONLY, 0o644},
MkdirPerms: 0o755,
WriteBufSize: 4096,
}
// OpenArgs defines args passed
// in a syscall.Open() operation.
type OpenArgs struct {
Flags int
Perms uint32
}
// Config defines options to be
// used when opening a DiskStorage.
type Config struct {
// OpenRead are the arguments passed
// to syscall.Open() when opening a
// file for read operations.
OpenRead OpenArgs
// OpenWrite are the arguments passed
// to syscall.Open() when opening a
// file for write operations.
OpenWrite OpenArgs
// MkdirPerms are the permissions used
// when creating necessary sub-dirs in
// a storage key with slashes.
MkdirPerms uint32
// WriteBufSize is the buffer size
// to use when writing file streams.
WriteBufSize int
}
// getDiskConfig returns valid (and owned!) Config for given ptr.
func getDiskConfig(cfg *Config) Config {
if cfg == nil {
// use defaults.
return defaultConfig
}
// Ensure non-zero syscall args.
if cfg.OpenRead.Flags == 0 {
cfg.OpenRead.Flags = defaultConfig.OpenRead.Flags
}
if cfg.OpenRead.Perms == 0 {
cfg.OpenRead.Perms = defaultConfig.OpenRead.Perms
}
if cfg.OpenWrite.Flags == 0 {
cfg.OpenWrite.Flags = defaultConfig.OpenWrite.Flags
}
if cfg.OpenWrite.Perms == 0 {
cfg.OpenWrite.Perms = defaultConfig.OpenWrite.Perms
}
if cfg.MkdirPerms == 0 {
cfg.MkdirPerms = defaultConfig.MkdirPerms
}
// Ensure valid write buf.
if cfg.WriteBufSize <= 0 {
cfg.WriteBufSize = defaultConfig.WriteBufSize
}
return Config{
OpenRead: cfg.OpenRead,
OpenWrite: cfg.OpenWrite,
MkdirPerms: cfg.MkdirPerms,
WriteBufSize: cfg.WriteBufSize,
}
}
// DiskStorage is a Storage implementation
// that stores directly to a filesystem.
type DiskStorage struct {
path string // path is the root path of this store
pool fastcopy.CopyPool // pool is the prepared io copier with buffer pool
cfg Config // cfg is the supplied configuration for this store
}
// Open opens a DiskStorage instance for given folder path and configuration.
func Open(path string, cfg *Config) (*DiskStorage, error) {
// Check + set config defaults.
config := getDiskConfig(cfg)
// Clean provided storage path, ensure
// final '/' to help with path trimming.
pb := internal.GetPathBuilder()
path = pb.Clean(path) + "/"
internal.PutPathBuilder(pb)
// Ensure directories up-to path exist.
perms := fs.FileMode(config.MkdirPerms)
err := os.MkdirAll(path, perms)
if err != nil {
return nil, err
}
// Prepare DiskStorage.
st := &DiskStorage{
path: path,
cfg: config,
}
// Set fastcopy pool buffer size.
st.pool.Buffer(config.WriteBufSize)
return st, nil
}
// Clean: implements Storage.Clean().
func (st *DiskStorage) Clean(ctx context.Context) error {
// Check context still valid.
if err := ctx.Err(); err != nil {
return err
}
// Clean unused directories.
return cleanDirs(st.path, OpenArgs{
Flags: syscall.O_RDONLY,
})
}
// ReadBytes: implements Storage.ReadBytes().
func (st *DiskStorage) ReadBytes(ctx context.Context, key string) ([]byte, error) {
// Get stream reader for key
rc, err := st.ReadStream(ctx, key)
if err != nil {
return nil, err
}
// Read all data to memory.
data, err := io.ReadAll(rc)
if err != nil {
_ = rc.Close()
return nil, err
}
// Close storage stream reader.
if err := rc.Close(); err != nil {
return nil, err
}
return data, nil
}
// ReadStream: implements Storage.ReadStream().
func (st *DiskStorage) ReadStream(ctx context.Context, key string) (io.ReadCloser, error) {
// Generate file path for key.
kpath, err := st.Filepath(key)
if err != nil {
return nil, err
}
// Check context still valid.
if err := ctx.Err(); err != nil {
return nil, err
}
// Attempt to open file with read args.
file, err := open(kpath, st.cfg.OpenRead)
if err != nil {
if err == syscall.ENOENT {
// Translate not-found errors and wrap with key.
err = internal.ErrWithKey(storage.ErrNotFound, key)
}
return nil, err
}
return file, nil
}
// WriteBytes: implements Storage.WriteBytes().
func (st *DiskStorage) WriteBytes(ctx context.Context, key string, value []byte) (int, error) {
n, err := st.WriteStream(ctx, key, bytes.NewReader(value))
return int(n), err
}
// WriteStream: implements Storage.WriteStream().
func (st *DiskStorage) WriteStream(ctx context.Context, key string, stream io.Reader) (int64, error) {
// Acquire path builder buffer.
pb := internal.GetPathBuilder()
// Generate the file path for given key.
kpath, subdir, err := st.filepath(pb, key)
if err != nil {
return 0, err
}
// Done with path buffer.
internal.PutPathBuilder(pb)
// Check context still valid.
if err := ctx.Err(); err != nil {
return 0, err
}
if subdir {
// Get dir of key path.
dir := path.Dir(kpath)
// Note that subdir will only be set if
// the transformed key (without base path)
// contains any slashes. This is not a
// definitive check, but it allows us to
// skip a syscall if mkdirall not needed!
perms := fs.FileMode(st.cfg.MkdirPerms)
err = os.MkdirAll(dir, perms)
if err != nil {
return 0, err
}
}
// Attempt to open file with write args.
file, err := open(kpath, st.cfg.OpenWrite)
if err != nil {
if st.cfg.OpenWrite.Flags&syscall.O_EXCL != 0 &&
err == syscall.EEXIST {
// Translate already exists errors and wrap with key.
err = internal.ErrWithKey(storage.ErrAlreadyExists, key)
}
return 0, err
}
// Copy provided stream to file interface.
n, err := st.pool.Copy(file, stream)
if err != nil {
_ = file.Close()
return n, err
}
// Finally, close file.
return n, file.Close()
}
// Stat implements Storage.Stat().
func (st *DiskStorage) Stat(ctx context.Context, key string) (*storage.Entry, error) {
// Generate file path for key.
kpath, err := st.Filepath(key)
if err != nil {
return nil, err
}
// Check context still valid.
if err := ctx.Err(); err != nil {
return nil, err
}
// Stat file on disk.
stat, err := stat(kpath)
if stat == nil {
return nil, err
}
return &storage.Entry{
Key: key,
Size: stat.Size,
}, nil
}
// Remove implements Storage.Remove().
func (st *DiskStorage) Remove(ctx context.Context, key string) error {
// Generate file path for key.
kpath, err := st.Filepath(key)
if err != nil {
return err
}
// Check context still valid.
if err := ctx.Err(); err != nil {
return err
}
// Stat file on disk.
stat, err := stat(kpath)
if err != nil {
return err
}
// Not-found (or handled
// as) error situations.
if stat == nil {
return internal.ErrWithKey(storage.ErrNotFound, key)
} else if stat.Mode&syscall.S_IFREG == 0 {
err := errors.New("storage/disk: not a regular file")
return internal.ErrWithKey(err, key)
}
// Remove at path (we know this is file).
if err := unlink(kpath); err != nil {
if err == syscall.ENOENT {
// Translate not-found errors and wrap with key.
err = internal.ErrWithKey(storage.ErrNotFound, key)
}
return err
}
return nil
}
// WalkKeys implements Storage.WalkKeys().
func (st *DiskStorage) WalkKeys(ctx context.Context, opts storage.WalkKeysOpts) error {
if opts.Step == nil {
panic("nil step fn")
}
// Check context still valid.
if err := ctx.Err(); err != nil {
return err
}
// Acquire path builder for walk.
pb := internal.GetPathBuilder()
defer internal.PutPathBuilder(pb)
// Dir to walk.
dir := st.path
if opts.Prefix != "" {
// Convert key prefix to one of our storage filepaths.
pathprefix, subdir, err := st.filepath(pb, opts.Prefix)
if err != nil {
return internal.ErrWithMsg(err, "prefix error")
}
if subdir {
// Note that subdir will only be set if
// the transformed key (without base path)
// contains any slashes. This is not a
// definitive check, but it allows us to
// update the directory we walk in case
// it might narrow search parameters!
dir = path.Dir(pathprefix)
}
// Set updated storage
// path prefix in opts.
opts.Prefix = pathprefix
}
// Only need to open dirs as read-only.
args := OpenArgs{Flags: syscall.O_RDONLY}
return walkDir(pb, dir, args, func(kpath string, fsentry fs.DirEntry) error {
if !fsentry.Type().IsRegular() {
// Ignore anything but
// regular file types.
return nil
}
// Get full item path (without root).
kpath = pb.Join(kpath, fsentry.Name())
// Perform a fast filter check against storage path prefix (if set).
if opts.Prefix != "" && !strings.HasPrefix(kpath, opts.Prefix) {
return nil // ignore
}
// Storage key without base.
key := kpath[len(st.path):]
// Ignore filtered keys.
if opts.Filter != nil &&
!opts.Filter(key) {
return nil // ignore
}
// Load file info. This should already
// be loaded due to the underlying call
// to os.File{}.ReadDir() populating them.
info, err := fsentry.Info()
if err != nil {
return err
}
// Perform provided walk function
return opts.Step(storage.Entry{
Key: key,
Size: info.Size(),
})
})
}
// Filepath checks and returns a formatted Filepath for given key.
func (st *DiskStorage) Filepath(key string) (path string, err error) {
pb := internal.GetPathBuilder()
path, _, err = st.filepath(pb, key)
internal.PutPathBuilder(pb)
return
}
// filepath performs the "meat" of Filepath(), returning also if path *may* be a subdir of base.
func (st *DiskStorage) filepath(pb *fastpath.Builder, key string) (path string, subdir bool, err error) {
// Fast check for whether this may be a
// sub-directory. This is not a definitive
// check, it's only for a fastpath check.
subdir = strings.ContainsRune(key, '/')
// Build from base.
pb.Append(st.path)
pb.Append(key)
// Take COPY of bytes.
path = string(pb.B)
// Check for dir traversal outside base.
if isDirTraversal(st.path, path) {
err = internal.ErrWithKey(storage.ErrInvalidKey, key)
}
return
}
// isDirTraversal will check if rootPlusPath is a dir traversal outside of root,
// assuming that both are cleaned and that rootPlusPath is path.Join(root, somePath).
func isDirTraversal(root, rootPlusPath string) bool {
switch {
// Root is $PWD, check for traversal out of
case root == ".":
return strings.HasPrefix(rootPlusPath, "../")
// The path MUST be prefixed by root
case !strings.HasPrefix(rootPlusPath, root):
return true
// In all other cases, check not equal
default:
return len(root) == len(rootPlusPath)
}
}

206
vendor/codeberg.org/gruf/go-storage/disk/fs.go generated vendored Normal file
View File

@@ -0,0 +1,206 @@
package disk
import (
"errors"
"fmt"
"io/fs"
"os"
"syscall"
"codeberg.org/gruf/go-fastpath/v2"
"codeberg.org/gruf/go-storage/internal"
)
// NOTE:
// These functions are for opening storage files,
// not necessarily for e.g. initial setup (OpenFile)
// walkDir traverses the dir tree of the supplied path, performing the supplied walkFn on each entry
func walkDir(pb *fastpath.Builder, path string, args OpenArgs, walkFn func(string, fs.DirEntry) error) error {
// Read directory entries at path.
entries, err := readDir(path, args)
if err != nil {
return err
}
// frame represents a directory entry
// walk-loop snapshot, taken when a sub
// directory requiring iteration is found
type frame struct {
path string
entries []fs.DirEntry
}
// stack contains a list of held snapshot
// frames, representing unfinished upper
// layers of a directory structure yet to
// be traversed.
var stack []frame
outer:
for {
if len(entries) == 0 {
if len(stack) == 0 {
// Reached end
break outer
}
// Pop frame from stack
frame := stack[len(stack)-1]
stack = stack[:len(stack)-1]
// Update loop vars
entries = frame.entries
path = frame.path
}
for len(entries) > 0 {
// Pop next entry from queue
entry := entries[0]
entries = entries[1:]
// Pass to provided walk function
if err := walkFn(path, entry); err != nil {
return err
}
if entry.IsDir() {
// Push current frame to stack
stack = append(stack, frame{
path: path,
entries: entries,
})
// Update current directory path
path = pb.Join(path, entry.Name())
// Read next directory entries
next, err := readDir(path, args)
if err != nil {
return err
}
// Set next entries
entries = next
continue outer
}
}
}
return nil
}
// cleanDirs traverses the dir tree of the supplied path, removing any folders with zero children
func cleanDirs(path string, args OpenArgs) error {
pb := internal.GetPathBuilder()
err := cleanDir(pb, path, args, true)
internal.PutPathBuilder(pb)
return err
}
// cleanDir performs the actual dir cleaning logic for the above top-level version.
func cleanDir(pb *fastpath.Builder, path string, args OpenArgs, top bool) error {
// Get directory entries at path.
entries, err := readDir(path, args)
if err != nil {
return err
}
// If no entries, delete dir.
if !top && len(entries) == 0 {
return rmdir(path)
}
var errs []error
// Iterate all directory entries.
for _, entry := range entries {
if entry.IsDir() {
// Calculate directory path.
dir := pb.Join(path, entry.Name())
// Recursively clean sub-directory entries, adding errs.
if err := cleanDir(pb, dir, args, false); err != nil {
err = fmt.Errorf("error(s) cleaning subdir %s: %w", dir, err)
errs = append(errs, err)
}
}
}
// Return combined errors.
return errors.Join(errs...)
}
// readDir will open file at path, read the unsorted list of entries, then close.
func readDir(path string, args OpenArgs) ([]fs.DirEntry, error) {
// Open directory at path.
file, err := open(path, args)
if err != nil {
return nil, err
}
// Read ALL directory entries.
entries, err := file.ReadDir(-1)
// Done with file
_ = file.Close()
return entries, err
}
// open is a simple wrapper around syscall.Open().
func open(path string, args OpenArgs) (*os.File, error) {
var fd int
err := retryOnEINTR(func() (err error) {
fd, err = syscall.Open(path, args.Flags, args.Perms)
return
})
if err != nil {
return nil, err
}
return os.NewFile(uintptr(fd), path), nil
}
// stat is a simple wrapper around syscall.Stat().
func stat(path string) (*syscall.Stat_t, error) {
var stat syscall.Stat_t
err := retryOnEINTR(func() error {
return syscall.Stat(path, &stat)
})
if err != nil {
if err == syscall.ENOENT {
// not-found is no error
err = nil
}
return nil, err
}
return &stat, nil
}
// unlink is a simple wrapper around syscall.Unlink().
func unlink(path string) error {
return retryOnEINTR(func() error {
return syscall.Unlink(path)
})
}
// rmdir is a simple wrapper around syscall.Rmdir().
func rmdir(path string) error {
return retryOnEINTR(func() error {
return syscall.Rmdir(path)
})
}
// retryOnEINTR is a low-level filesystem function
// for retrying syscalls on O_EINTR received.
func retryOnEINTR(do func() error) error {
for {
err := do()
if err == syscall.EINTR {
continue
}
return err
}
}