bump go-store version (includes minio) (#1657)

Signed-off-by: kim <grufwub@gmail.com>
This commit is contained in:
kim
2023-03-30 22:39:55 +01:00
committed by GitHub
parent 0746ef741a
commit a5c920a50b
33 changed files with 14876 additions and 8512 deletions

View File

@ -13,6 +13,7 @@ import (
"io/ioutil"
"math"
"runtime"
"strconv"
"sync"
)
@ -880,15 +881,20 @@ func (r *Reader) Skip(n int64) error {
// See Reader.ReadSeeker
type ReadSeeker struct {
*Reader
readAtMu sync.Mutex
}
// ReadSeeker will return an io.ReadSeeker compatible version of the reader.
// ReadSeeker will return an io.ReadSeeker and io.ReaderAt
// compatible version of the reader.
// If 'random' is specified the returned io.Seeker can be used for
// random seeking, otherwise only forward seeking is supported.
// Enabling random seeking requires the original input to support
// the io.Seeker interface.
// A custom index can be specified which will be used if supplied.
// When using a custom index, it will not be read from the input stream.
// The ReadAt position will affect regular reads and the current position of Seek.
// So using Read after ReadAt will continue from where the ReadAt stopped.
// No functions should be used concurrently.
// The returned ReadSeeker contains a shallow reference to the existing Reader,
// meaning changes performed to one is reflected in the other.
func (r *Reader) ReadSeeker(random bool, index []byte) (*ReadSeeker, error) {
@ -958,42 +964,55 @@ func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
// Reset on EOF
r.err = nil
}
if offset == 0 && whence == io.SeekCurrent {
return r.blockStart + int64(r.i), nil
}
if !r.readHeader {
// Make sure we read the header.
_, r.err = r.Read([]byte{})
}
rs, ok := r.r.(io.ReadSeeker)
if r.index == nil || !ok {
if whence == io.SeekCurrent && offset >= 0 {
err := r.Skip(offset)
return r.blockStart + int64(r.i), err
}
if whence == io.SeekStart && offset >= r.blockStart+int64(r.i) {
err := r.Skip(offset - r.blockStart - int64(r.i))
return r.blockStart + int64(r.i), err
}
return 0, ErrUnsupported
}
// Calculate absolute offset.
absOffset := offset
switch whence {
case io.SeekStart:
case io.SeekCurrent:
offset += r.blockStart + int64(r.i)
absOffset = r.blockStart + int64(r.i) + offset
case io.SeekEnd:
if offset > 0 {
return 0, errors.New("seek after end of file")
if r.index == nil {
return 0, ErrUnsupported
}
offset = r.index.TotalUncompressed + offset
absOffset = r.index.TotalUncompressed + offset
default:
r.err = ErrUnsupported
return 0, r.err
}
if offset < 0 {
if absOffset < 0 {
return 0, errors.New("seek before start of file")
}
c, u, err := r.index.Find(offset)
if !r.readHeader {
// Make sure we read the header.
_, r.err = r.Read([]byte{})
if r.err != nil {
return 0, r.err
}
}
// If we are inside current block no need to seek.
// This includes no offset changes.
if absOffset >= r.blockStart && absOffset < r.blockStart+int64(r.j) {
r.i = int(absOffset - r.blockStart)
return r.blockStart + int64(r.i), nil
}
rs, ok := r.r.(io.ReadSeeker)
if r.index == nil || !ok {
currOffset := r.blockStart + int64(r.i)
if absOffset >= currOffset {
err := r.Skip(absOffset - currOffset)
return r.blockStart + int64(r.i), err
}
return 0, ErrUnsupported
}
// We can seek and we have an index.
c, u, err := r.index.Find(absOffset)
if err != nil {
return r.blockStart + int64(r.i), err
}
@ -1004,12 +1023,57 @@ func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
return 0, err
}
r.i = r.j // Remove rest of current block.
if u < offset {
r.i = r.j // Remove rest of current block.
r.blockStart = u - int64(r.j) // Adjust current block start for accounting.
if u < absOffset {
// Forward inside block
return offset, r.Skip(offset - u)
return absOffset, r.Skip(absOffset - u)
}
return offset, nil
if u > absOffset {
return 0, fmt.Errorf("s2 seek: (internal error) u (%d) > absOffset (%d)", u, absOffset)
}
return absOffset, nil
}
// ReadAt reads len(p) bytes into p starting at offset off in the
// underlying input source. It returns the number of bytes
// read (0 <= n <= len(p)) and any error encountered.
//
// When ReadAt returns n < len(p), it returns a non-nil error
// explaining why more bytes were not returned. In this respect,
// ReadAt is stricter than Read.
//
// Even if ReadAt returns n < len(p), it may use all of p as scratch
// space during the call. If some data is available but not len(p) bytes,
// ReadAt blocks until either all the data is available or an error occurs.
// In this respect ReadAt is different from Read.
//
// If the n = len(p) bytes returned by ReadAt are at the end of the
// input source, ReadAt may return either err == EOF or err == nil.
//
// If ReadAt is reading from an input source with a seek offset,
// ReadAt should not affect nor be affected by the underlying
// seek offset.
//
// Clients of ReadAt can execute parallel ReadAt calls on the
// same input source. This is however not recommended.
func (r *ReadSeeker) ReadAt(p []byte, offset int64) (int, error) {
r.readAtMu.Lock()
defer r.readAtMu.Unlock()
_, err := r.Seek(offset, io.SeekStart)
if err != nil {
return 0, err
}
n := 0
for n < len(p) {
n2, err := r.Read(p[n:])
if err != nil {
// This will include io.EOF
return n + n2, err
}
n += n2
}
return n, nil
}
// ReadByte satisfies the io.ByteReader interface.
@ -1048,3 +1112,370 @@ func (r *Reader) SkippableCB(id uint8, fn func(r io.Reader) error) error {
r.skippableCB[id] = fn
return nil
}
// s2DecodeDict writes the decoding of src to dst. It assumes that the varint-encoded
// length of the decompressed bytes has already been read, and that len(dst)
// equals that length.
//
// It returns 0 on success or a decodeErrCodeXxx error code on failure.
func s2DecodeDict(dst, src []byte, dict *Dict) int {
if dict == nil {
return s2Decode(dst, src)
}
const debug = false
const debugErrs = debug
if debug {
fmt.Println("Starting decode, dst len:", len(dst))
}
var d, s, length int
offset := len(dict.dict) - dict.repeat
// As long as we can read at least 5 bytes...
for s < len(src)-5 {
// Removing bounds checks is SLOWER, when if doing
// in := src[s:s+5]
// Checked on Go 1.18
switch src[s] & 0x03 {
case tagLiteral:
x := uint32(src[s] >> 2)
switch {
case x < 60:
s++
case x == 60:
s += 2
x = uint32(src[s-1])
case x == 61:
in := src[s : s+3]
x = uint32(in[1]) | uint32(in[2])<<8
s += 3
case x == 62:
in := src[s : s+4]
// Load as 32 bit and shift down.
x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
x >>= 8
s += 4
case x == 63:
in := src[s : s+5]
x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24
s += 5
}
length = int(x) + 1
if debug {
fmt.Println("literals, length:", length, "d-after:", d+length)
}
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
if debugErrs {
fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s)
}
return decodeErrCodeCorrupt
}
copy(dst[d:], src[s:s+length])
d += length
s += length
continue
case tagCopy1:
s += 2
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
length = int(src[s-2]) >> 2 & 0x7
if toffset == 0 {
if debug {
fmt.Print("(repeat) ")
}
// keep last offset
switch length {
case 5:
length = int(src[s]) + 4
s += 1
case 6:
in := src[s : s+2]
length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8)
s += 2
case 7:
in := src[s : s+3]
length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16)
s += 3
default: // 0-> 4
}
} else {
offset = toffset
}
length += 4
case tagCopy2:
in := src[s : s+3]
offset = int(uint32(in[1]) | uint32(in[2])<<8)
length = 1 + int(in[0])>>2
s += 3
case tagCopy4:
in := src[s : s+5]
offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24)
length = 1 + int(in[0])>>2
s += 5
}
if offset <= 0 || length > len(dst)-d {
if debugErrs {
fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d)
}
return decodeErrCodeCorrupt
}
// copy from dict
if d < offset {
if d > MaxDictSrcOffset {
if debugErrs {
fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length)
}
return decodeErrCodeCorrupt
}
startOff := len(dict.dict) - offset + d
if startOff < 0 || startOff+length > len(dict.dict) {
if debugErrs {
fmt.Printf("offset (%d) + length (%d) bigger than dict (%d)\n", offset, length, len(dict.dict))
}
return decodeErrCodeCorrupt
}
if debug {
fmt.Println("dict copy, length:", length, "offset:", offset, "d-after:", d+length, "dict start offset:", startOff)
}
copy(dst[d:d+length], dict.dict[startOff:])
d += length
continue
}
if debug {
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
}
// Copy from an earlier sub-slice of dst to a later sub-slice.
// If no overlap, use the built-in copy:
if offset > length {
copy(dst[d:d+length], dst[d-offset:])
d += length
continue
}
// Unlike the built-in copy function, this byte-by-byte copy always runs
// forwards, even if the slices overlap. Conceptually, this is:
//
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
//
// We align the slices into a and b and show the compiler they are the same size.
// This allows the loop to run without bounds checks.
a := dst[d : d+length]
b := dst[d-offset:]
b = b[:len(a)]
for i := range a {
a[i] = b[i]
}
d += length
}
// Remaining with extra checks...
for s < len(src) {
switch src[s] & 0x03 {
case tagLiteral:
x := uint32(src[s] >> 2)
switch {
case x < 60:
s++
case x == 60:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
x = uint32(src[s-1])
case x == 61:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
x = uint32(src[s-2]) | uint32(src[s-1])<<8
case x == 62:
s += 4
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
case x == 63:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
}
length = int(x) + 1
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
if debugErrs {
fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s)
}
return decodeErrCodeCorrupt
}
if debug {
fmt.Println("literals, length:", length, "d-after:", d+length)
}
copy(dst[d:], src[s:s+length])
d += length
s += length
continue
case tagCopy1:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = int(src[s-2]) >> 2 & 0x7
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
if toffset == 0 {
if debug {
fmt.Print("(repeat) ")
}
// keep last offset
switch length {
case 5:
s += 1
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = int(uint32(src[s-1])) + 4
case 6:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
case 7:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
default: // 0-> 4
}
} else {
offset = toffset
}
length += 4
case tagCopy2:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-3])>>2
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
case tagCopy4:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-5])>>2
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
}
if offset <= 0 || length > len(dst)-d {
if debugErrs {
fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d)
}
return decodeErrCodeCorrupt
}
// copy from dict
if d < offset {
if d > MaxDictSrcOffset {
if debugErrs {
fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length)
}
return decodeErrCodeCorrupt
}
rOff := len(dict.dict) - (offset - d)
if debug {
fmt.Println("starting dict entry from dict offset", len(dict.dict)-rOff)
}
if rOff+length > len(dict.dict) {
if debugErrs {
fmt.Println("err: END offset", rOff+length, "bigger than dict", len(dict.dict), "dict offset:", rOff, "length:", length)
}
return decodeErrCodeCorrupt
}
if rOff < 0 {
if debugErrs {
fmt.Println("err: START offset", rOff, "less than 0", len(dict.dict), "dict offset:", rOff, "length:", length)
}
return decodeErrCodeCorrupt
}
copy(dst[d:d+length], dict.dict[rOff:])
d += length
continue
}
if debug {
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
}
// Copy from an earlier sub-slice of dst to a later sub-slice.
// If no overlap, use the built-in copy:
if offset > length {
copy(dst[d:d+length], dst[d-offset:])
d += length
continue
}
// Unlike the built-in copy function, this byte-by-byte copy always runs
// forwards, even if the slices overlap. Conceptually, this is:
//
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
//
// We align the slices into a and b and show the compiler they are the same size.
// This allows the loop to run without bounds checks.
a := dst[d : d+length]
b := dst[d-offset:]
b = b[:len(a)]
for i := range a {
a[i] = b[i]
}
d += length
}
if d != len(dst) {
if debugErrs {
fmt.Println("wanted length", len(dst), "got", d)
}
return decodeErrCodeCorrupt
}
return 0
}