Files
GoToSocial/vendor/codeberg.org/gruf/go-split/splitter.go
kim 6acf56cde9 [feature] support nested configuration files, and setting ALL configuration variables by CLI and env (#4109)
This updates our configuration code generator to now also include map marshal and unmarshalers. So we now have much more control over how things get read from pflags, and stored / read from viper configuration. This allows us to set ALL configuration variables by CLI and environment now, AND support nested configuration files. e.g.

```yaml
advanced:
    scraper-deterrence = true

http-client:
    allow-ips = ["127.0.0.1"]
```

is the same as

```yaml
advanced-scraper-deterrence = true

http-client-allow-ips = ["127.0.0.1"]
```

This also starts cleaning up of our jumbled Configuration{} type by moving the advanced configuration options into their own nested structs, also as a way to show what it's capable of. It's worth noting however that nesting only works if the Go types are nested too (as this is how we hint to our code generator to generate the necessary flattening code :p).

closes #3195

Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4109
Co-authored-by: kim <grufwub@gmail.com>
Co-committed-by: kim <grufwub@gmail.com>
2025-05-06 15:51:45 +00:00

205 lines
4.2 KiB
Go

package split
import (
"errors"
"strings"
"unicode"
"unicode/utf8"
)
// Splitter holds onto a byte buffer for use in minimising allocations during SplitFunc().
type Splitter struct{ B []byte }
// SplitFunc will split input string on commas, taking into account string quoting and
// stripping extra whitespace, passing each split to the given function hook.
func (s *Splitter) SplitFunc(str string, fn func(string) error) error {
for {
// Reset buffer
s.B = s.B[0:0]
// Trim leading space
str = trimLeadingSpace(str)
if len(str) < 1 {
// Reached end
return nil
}
switch {
// Single / double quoted
case str[0] == '\'', str[0] == '"':
// Calculate next string elem
i := 1 + s.next(str[1:], str[0])
if i == 0 /* i.e. if .next() returned -1 */ {
return errors.New("missing end quote")
}
// Pass next element to callback func
if err := fn(string(s.B)); err != nil {
return err
}
// Reslice + trim leading space
str = trimLeadingSpace(str[i+1:])
if len(str) < 1 {
// reached end
return nil
}
if str[0] != ',' {
// malformed element without comma after quote
return errors.New("missing comma separator")
}
// Skip comma
str = str[1:]
// Empty segment
case str[0] == ',':
str = str[1:]
// No quoting
default:
// Calculate next string elem
i := s.next(str, ',')
switch i {
// Reached end
case -1:
// we know len > 0
// Pass to callback
return fn(string(s.B))
// Empty elem
case 0:
str = str[1:]
// Non-zero elem
default:
// Pass next element to callback
if err := fn(string(s.B)); err != nil {
return err
}
// Skip past eleme
str = str[i+1:]
}
}
}
}
// next will build the next string element in s.B up to non-delimited instance of c,
// returning number of characters iterated, or -1 if the end of the string was reached.
func (s *Splitter) next(str string, c byte) int {
var delims int
// Guarantee buf large enough
if len(str) > cap(s.B)-len(s.B) {
nb := make([]byte, 2*cap(s.B)+len(str))
_ = copy(nb, s.B)
s.B = nb[:len(s.B)]
}
for i := 0; i < len(str); i++ {
// Increment delims
if str[i] == '\\' {
delims++
continue
}
if str[i] == c {
var count int
if count = delims / 2; count > 0 {
// Add backslashes to buffer
slashes := backslashes(count)
s.B = append(s.B, slashes...)
}
// Reached delim'd char
if delims-count == 0 {
return i
}
} else if delims > 0 {
// Add backslashes to buffer
slashes := backslashes(delims)
s.B = append(s.B, slashes...)
}
// Write byte to buffer
s.B = append(s.B, str[i])
// Reset count
delims = 0
}
return -1
}
// asciiSpace is a lookup table of ascii space chars (see: strings.asciiSet).
var asciiSpace = func() (as [8]uint32) {
as['\t'/32] |= 1 << ('\t' % 32)
as['\n'/32] |= 1 << ('\n' % 32)
as['\v'/32] |= 1 << ('\v' % 32)
as['\f'/32] |= 1 << ('\f' % 32)
as['\r'/32] |= 1 << ('\r' % 32)
as[' '/32] |= 1 << (' ' % 32)
return
}()
// trimLeadingSpace trims the leading space from a string.
func trimLeadingSpace(str string) string {
var start int
for ; start < len(str); start++ {
// If beyond ascii range, trim using slower rune check.
if str[start] >= utf8.RuneSelf {
return trimLeadingSpaceSlow(str[start:])
}
// Ascii character
char := str[start]
// This is first non-space ASCII, trim up to here
if (asciiSpace[char/32] & (1 << (char % 32))) == 0 {
break
}
}
return str[start:]
}
// trimLeadingSpaceSlow trims leading space using the slower unicode.IsSpace check.
func trimLeadingSpaceSlow(str string) string {
for i, r := range str {
if !unicode.IsSpace(r) {
return str[i:]
}
}
return str
}
// backslashes will return a string of backslashes of given length.
func backslashes(count int) string {
const backslashes = `\\\\\\\\\\\\\\\\\\\\`
// Fast-path, use string const
if count < len(backslashes) {
return backslashes[:count]
}
// Slow-path, build custom string
return backslashSlow(count)
}
// backslashSlow will build a string of backslashes of custom length.
func backslashSlow(count int) string {
var buf strings.Builder
for i := 0; i < count; i++ {
buf.WriteByte('\\')
}
return buf.String()
}