GoToSocial/vendor/codeberg.org/gruf/go-split/splitter.go

package split

import (
	"errors"
	"strings"
	"unicode"
	"unicode/utf8"
)

// Splitter holds onto a byte buffer for use in minimising allocations during SplitFunc().
type Splitter struct{ B []byte }

// SplitFunc will split input string on commas, taking into account string quoting and
// stripping extra whitespace, passing each split to the given function hook.
func (s *Splitter) SplitFunc(str string, fn func(string) error) error {
	for {
		// Reset buffer
		s.B = s.B[0:0]

		// Trim leading space
		str = trimLeadingSpace(str)

		if len(str) < 1 {
			// Reached end
			return nil
		}

		switch {
		// Single / double quoted
		case str[0] == '\'', str[0] == '"':
			// Calculate next string elem
			i := 1 + s.next(str[1:], str[0])
			if i == 0 /* i.e. if .next() returned -1 */ {
				return errors.New("missing end quote")
			}

			// Pass next element to callback func
			if err := fn(string(s.B)); err != nil {
				return err
			}

			// Reslice + trim leading space
			str = trimLeadingSpace(str[i+1:])

			if len(str) < 1 {
				// reached end
				return nil
			}

			if str[0] != ',' {
				// malformed element without comma after quote
				return errors.New("missing comma separator")
			}

			// Skip comma
			str = str[1:]

		// Empty segment
		case str[0] == ',':
			str = str[1:]

		// No quoting
		default:
			// Calculate next string elem
			i := s.next(str, ',')

			switch i {
			// Reached end
			case -1:
				// we know len > 0

				// Pass to callback
				return fn(string(s.B))

			// Empty elem
			case 0:
				str = str[1:]

			// Non-zero elem
			default:
				// Pass next element to callback
				if err := fn(string(s.B)); err != nil {
					return err
				}

				// Skip past eleme
				str = str[i+1:]
			}
		}
	}
}

// next will build the next string element in s.B up to non-delimited instance of c,
// returning number of characters iterated, or -1 if the end of the string was reached.
func (s *Splitter) next(str string, c byte) int {
	var delims int

	// Guarantee buf large enough
	if len(str) > cap(s.B)-len(s.B) {
		nb := make([]byte, 2*cap(s.B)+len(str))
		_ = copy(nb, s.B)
		s.B = nb[:len(s.B)]
	}

	for i := 0; i < len(str); i++ {
		// Increment delims
		if str[i] == '\\' {
			delims++
			continue
		}

		if str[i] == c {
			var count int

			if count = delims / 2; count > 0 {
				// Add backslashes to buffer
				slashes := backslashes(count)
				s.B = append(s.B, slashes...)
			}

			// Reached delim'd char
			if delims-count == 0 {
				return i
			}
		} else if delims > 0 {
			// Add backslashes to buffer
			slashes := backslashes(delims)
			s.B = append(s.B, slashes...)
		}

		// Write byte to buffer
		s.B = append(s.B, str[i])

		// Reset count
		delims = 0
	}

	return -1
}

// asciiSpace is a lookup table of ascii space chars (see: strings.asciiSet).
var asciiSpace = func() (as [8]uint32) {
	as['\t'/32] |= 1 << ('\t' % 32)
	as['\n'/32] |= 1 << ('\n' % 32)
	as['\v'/32] |= 1 << ('\v' % 32)
	as['\f'/32] |= 1 << ('\f' % 32)
	as['\r'/32] |= 1 << ('\r' % 32)
	as[' '/32] |= 1 << (' ' % 32)
	return
}()

// trimLeadingSpace trims the leading space from a string.
func trimLeadingSpace(str string) string {
	var start int

	for ; start < len(str); start++ {
		// If beyond ascii range, trim using slower rune check.
		if str[start] >= utf8.RuneSelf {
			return trimLeadingSpaceSlow(str[start:])
		}

		// Ascii character
		char := str[start]

		// This is first non-space ASCII, trim up to here
		if (asciiSpace[char/32] & (1 << (char % 32))) == 0 {
			break
		}
	}

	return str[start:]
}

// trimLeadingSpaceSlow trims leading space using the slower unicode.IsSpace check.
func trimLeadingSpaceSlow(str string) string {
	for i, r := range str {
		if !unicode.IsSpace(r) {
			return str[i:]
		}
	}
	return str
}

// backslashes will return a string of backslashes of given length.
func backslashes(count int) string {
	const backslashes = `\\\\\\\\\\\\\\\\\\\\`

	// Fast-path, use string const
	if count < len(backslashes) {
		return backslashes[:count]
	}

	// Slow-path, build custom string
	return backslashSlow(count)
}

// backslashSlow will build a string of backslashes of custom length.
func backslashSlow(count int) string {
	var buf strings.Builder
	for i := 0; i < count; i++ {
		buf.WriteByte('\\')
	}
	return buf.String()
}