[feature] add per-uri dereferencer locks (#2291)

2025-06-05 21:59:39 +02:00 · 2023-10-31 11:12:22 +00:00
parent 51d0a0bba5
commit ce71a5a790
54 changed files with 2432 additions and 2719 deletions
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@ -7,6 +7,7 @@ package flate

 import (
 	"encoding/binary"
+	"errors"
 	"fmt"
 	"io"
 	"math"
@ -833,6 +834,12 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
 		d.initDeflate()
 		d.fill = (*compressor).fillDeflate
 		d.step = (*compressor).deflateLazy
+	case -level >= MinCustomWindowSize && -level <= MaxCustomWindowSize:
+		d.w.logNewTablePenalty = 7
+		d.fast = &fastEncL5Window{maxOffset: int32(-level), cur: maxStoreBlockSize}
+		d.window = make([]byte, maxStoreBlockSize)
+		d.fill = (*compressor).fillBlock
+		d.step = (*compressor).storeFast
 	default:
 		return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
 	}
@ -929,6 +936,28 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
 	return zw, err
 }

+// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow.
+const MinCustomWindowSize = 32
+
+// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow.
+const MaxCustomWindowSize = windowSize
+
+// NewWriterWindow returns a new Writer compressing data with a custom window size.
+// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize.
+func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) {
+	if windowSize < MinCustomWindowSize {
+		return nil, errors.New("flate: requested window size less than MinWindowSize")
+	}
+	if windowSize > MaxCustomWindowSize {
+		return nil, errors.New("flate: requested window size bigger than MaxCustomWindowSize")
+	}
+	var dw Writer
+	if err := dw.d.init(w, -windowSize); err != nil {
+		return nil, err
+	}
+	return &dw, nil
+}
+
 // A Writer takes data written to it and writes the compressed
 // form of that data to an underlying writer (see NewWriter).
 type Writer struct {
--- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go
+++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
@ -8,7 +8,6 @@ package flate
 import (
 	"encoding/binary"
 	"fmt"
-	"math/bits"
 )

 type fastEnc interface {
@ -192,25 +191,3 @@ func (e *fastGen) Reset() {
 	}
 	e.hist = e.hist[:0]
 }
-
-// matchLen returns the maximum length.
-// 'a' must be the shortest of the two.
-func matchLen(a, b []byte) int {
-	var checked int
-
-	for len(a) >= 8 {
-		if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
-			return checked + (bits.TrailingZeros64(diff) >> 3)
-		}
-		checked += 8
-		a = a[8:]
-		b = b[8:]
-	}
-	b = b[:len(a)]
-	for i := range a {
-		if a[i] != b[i] {
-			return i + checked
-		}
-	}
-	return len(a) + checked
-}
--- a/vendor/github.com/klauspost/compress/flate/inflate.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate.go
@ -120,8 +120,9 @@ func (h *huffmanDecoder) init(lengths []int) bool {
 	const sanity = false

 	if h.chunks == nil {
-		h.chunks = &[huffmanNumChunks]uint16{}
+		h.chunks = new([huffmanNumChunks]uint16)
 	}
+
 	if h.maxRead != 0 {
 		*h = huffmanDecoder{chunks: h.chunks, links: h.links}
 	}
@ -175,6 +176,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
 	}

 	h.maxRead = min
+
 	chunks := h.chunks[:]
 	for i := range chunks {
 		chunks[i] = 0
@ -202,8 +204,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
 			if cap(h.links[off]) < numLinks {
 				h.links[off] = make([]uint16, numLinks)
 			} else {
-				links := h.links[off][:0]
-				h.links[off] = links[:numLinks]
+				h.links[off] = h.links[off][:numLinks]
 			}
 		}
 	} else {
@ -277,7 +278,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
 	return true
 }

-// The actual read interface needed by NewReader.
+// Reader is the actual read interface needed by NewReader.
 // If the passed in io.Reader does not also have ReadByte,
 // the NewReader will introduce its own buffering.
 type Reader interface {
@ -285,6 +286,18 @@ type Reader interface {
 	io.ByteReader
 }

+type step uint8
+
+const (
+	copyData step = iota + 1
+	nextBlock
+	huffmanBytesBuffer
+	huffmanBytesReader
+	huffmanBufioReader
+	huffmanStringsReader
+	huffmanGenericReader
+)
+
 // Decompress state.
 type decompressor struct {
 	// Input source.
@ -303,7 +316,7 @@ type decompressor struct {

 	// Next step in the decompression,
 	// and decompression state.
-	step      func(*decompressor)
+	step      step
 	stepState int
 	err       error
 	toRead    []byte
@ -342,7 +355,7 @@ func (f *decompressor) nextBlock() {
 		// compressed, fixed Huffman tables
 		f.hl = &fixedHuffmanDecoder
 		f.hd = nil
-		f.huffmanBlockDecoder()()
+		f.huffmanBlockDecoder()
 		if debugDecode {
 			fmt.Println("predefinied huffman block")
 		}
@ -353,7 +366,7 @@ func (f *decompressor) nextBlock() {
 		}
 		f.hl = &f.h1
 		f.hd = &f.h2
-		f.huffmanBlockDecoder()()
+		f.huffmanBlockDecoder()
 		if debugDecode {
 			fmt.Println("dynamic huffman block")
 		}
@ -379,14 +392,16 @@ func (f *decompressor) Read(b []byte) (int, error) {
 		if f.err != nil {
 			return 0, f.err
 		}
-		f.step(f)
+
+		f.doStep()
+
 		if f.err != nil && len(f.toRead) == 0 {
 			f.toRead = f.dict.readFlush() // Flush what's left in case of error
 		}
 	}
 }

-// Support the io.WriteTo interface for io.Copy and friends.
+// WriteTo implements the io.WriteTo interface for io.Copy and friends.
 func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
 	total := int64(0)
 	flushed := false
@ -410,7 +425,7 @@ func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
 			return total, f.err
 		}
 		if f.err == nil {
-			f.step(f)
+			f.doStep()
 		}
 		if len(f.toRead) == 0 && f.err != nil && !flushed {
 			f.toRead = f.dict.readFlush() // Flush what's left in case of error
@ -631,7 +646,7 @@ func (f *decompressor) copyData() {

 	if f.dict.availWrite() == 0 || f.copyLen > 0 {
 		f.toRead = f.dict.readFlush()
-		f.step = (*decompressor).copyData
+		f.step = copyData
 		return
 	}
 	f.finishBlock()
@ -644,7 +659,28 @@ func (f *decompressor) finishBlock() {
 		}
 		f.err = io.EOF
 	}
-	f.step = (*decompressor).nextBlock
+	f.step = nextBlock
+}
+
+func (f *decompressor) doStep() {
+	switch f.step {
+	case copyData:
+		f.copyData()
+	case nextBlock:
+		f.nextBlock()
+	case huffmanBytesBuffer:
+		f.huffmanBytesBuffer()
+	case huffmanBytesReader:
+		f.huffmanBytesReader()
+	case huffmanBufioReader:
+		f.huffmanBufioReader()
+	case huffmanStringsReader:
+		f.huffmanStringsReader()
+	case huffmanGenericReader:
+		f.huffmanGenericReader()
+	default:
+		panic("BUG: unexpected step state")
+	}
 }

 // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
@ -747,7 +783,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error {
 		h1:       f.h1,
 		h2:       f.h2,
 		dict:     f.dict,
-		step:     (*decompressor).nextBlock,
+		step:     nextBlock,
 	}
 	f.dict.init(maxMatchOffset, dict)
 	return nil
@ -768,7 +804,7 @@ func NewReader(r io.Reader) io.ReadCloser {
 	f.r = makeReader(r)
 	f.bits = new([maxNumLit + maxNumDist]int)
 	f.codebits = new([numCodes]int)
-	f.step = (*decompressor).nextBlock
+	f.step = nextBlock
 	f.dict.init(maxMatchOffset, nil)
 	return &f
 }
@ -787,7 +823,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
 	f.r = makeReader(r)
 	f.bits = new([maxNumLit + maxNumDist]int)
 	f.codebits = new([numCodes]int)
-	f.step = (*decompressor).nextBlock
+	f.step = nextBlock
 	f.dict.init(maxMatchOffset, dict)
 	return &f
 }
--- a/vendor/github.com/klauspost/compress/flate/inflate_gen.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go
@ -85,7 +85,7 @@ readLiteral:
 			dict.writeByte(byte(v))
 			if dict.availWrite() == 0 {
 				f.toRead = dict.readFlush()
-				f.step = (*decompressor).huffmanBytesBuffer
+				f.step = huffmanBytesBuffer
 				f.stepState = stateInit
 				f.b, f.nb = fb, fnb
 				return
@ -251,7 +251,7 @@ copyHistory:

 		if dict.availWrite() == 0 || f.copyLen > 0 {
 			f.toRead = dict.readFlush()
-			f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work
+			f.step = huffmanBytesBuffer // We need to continue this work
 			f.stepState = stateDict
 			f.b, f.nb = fb, fnb
 			return
@ -336,7 +336,7 @@ readLiteral:
 			dict.writeByte(byte(v))
 			if dict.availWrite() == 0 {
 				f.toRead = dict.readFlush()
-				f.step = (*decompressor).huffmanBytesReader
+				f.step = huffmanBytesReader
 				f.stepState = stateInit
 				f.b, f.nb = fb, fnb
 				return
@ -502,7 +502,7 @@ copyHistory:

 		if dict.availWrite() == 0 || f.copyLen > 0 {
 			f.toRead = dict.readFlush()
-			f.step = (*decompressor).huffmanBytesReader // We need to continue this work
+			f.step = huffmanBytesReader // We need to continue this work
 			f.stepState = stateDict
 			f.b, f.nb = fb, fnb
 			return
@ -587,7 +587,7 @@ readLiteral:
 			dict.writeByte(byte(v))
 			if dict.availWrite() == 0 {
 				f.toRead = dict.readFlush()
-				f.step = (*decompressor).huffmanBufioReader
+				f.step = huffmanBufioReader
 				f.stepState = stateInit
 				f.b, f.nb = fb, fnb
 				return
@ -753,7 +753,7 @@ copyHistory:

 		if dict.availWrite() == 0 || f.copyLen > 0 {
 			f.toRead = dict.readFlush()
-			f.step = (*decompressor).huffmanBufioReader // We need to continue this work
+			f.step = huffmanBufioReader // We need to continue this work
 			f.stepState = stateDict
 			f.b, f.nb = fb, fnb
 			return
@ -838,7 +838,7 @@ readLiteral:
 			dict.writeByte(byte(v))
 			if dict.availWrite() == 0 {
 				f.toRead = dict.readFlush()
-				f.step = (*decompressor).huffmanStringsReader
+				f.step = huffmanStringsReader
 				f.stepState = stateInit
 				f.b, f.nb = fb, fnb
 				return
@ -1004,7 +1004,7 @@ copyHistory:

 		if dict.availWrite() == 0 || f.copyLen > 0 {
 			f.toRead = dict.readFlush()
-			f.step = (*decompressor).huffmanStringsReader // We need to continue this work
+			f.step = huffmanStringsReader // We need to continue this work
 			f.stepState = stateDict
 			f.b, f.nb = fb, fnb
 			return
@ -1089,7 +1089,7 @@ readLiteral:
 			dict.writeByte(byte(v))
 			if dict.availWrite() == 0 {
 				f.toRead = dict.readFlush()
-				f.step = (*decompressor).huffmanGenericReader
+				f.step = huffmanGenericReader
 				f.stepState = stateInit
 				f.b, f.nb = fb, fnb
 				return
@ -1255,7 +1255,7 @@ copyHistory:

 		if dict.availWrite() == 0 || f.copyLen > 0 {
 			f.toRead = dict.readFlush()
-			f.step = (*decompressor).huffmanGenericReader // We need to continue this work
+			f.step = huffmanGenericReader // We need to continue this work
 			f.stepState = stateDict
 			f.b, f.nb = fb, fnb
 			return
@ -1265,19 +1265,19 @@ copyHistory:
 	// Not reached
 }

-func (f *decompressor) huffmanBlockDecoder() func() {
+func (f *decompressor) huffmanBlockDecoder() {
 	switch f.r.(type) {
 	case *bytes.Buffer:
-		return f.huffmanBytesBuffer
+		f.huffmanBytesBuffer()
 	case *bytes.Reader:
-		return f.huffmanBytesReader
+		f.huffmanBytesReader()
 	case *bufio.Reader:
-		return f.huffmanBufioReader
+		f.huffmanBufioReader()
 	case *strings.Reader:
-		return f.huffmanStringsReader
+		f.huffmanStringsReader()
 	case Reader:
-		return f.huffmanGenericReader
+		f.huffmanGenericReader()
 	default:
-		return f.huffmanGenericReader
+		f.huffmanGenericReader()
 	}
 }
--- a/vendor/github.com/klauspost/compress/flate/level5.go
+++ b/vendor/github.com/klauspost/compress/flate/level5.go
@ -308,3 +308,401 @@ emitRemainder:
 		emitLiteral(dst, src[nextEmit:])
 	}
 }
+
+// fastEncL5Window is a level 5 encoder,
+// but with a custom window size.
+type fastEncL5Window struct {
+	hist      []byte
+	cur       int32
+	maxOffset int32
+	table     [tableSize]tableEntry
+	bTable    [tableSize]tableEntryPrev
+}
+
+func (e *fastEncL5Window) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		hashShortBytes         = 4
+	)
+	maxMatchOffset := e.maxOffset
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntry{}
+			}
+			for i := range e.bTable[:] {
+				e.bTable[i] = tableEntryPrev{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.table[i].offset = v
+		}
+		for i := range e.bTable[:] {
+			v := e.bTable[i]
+			if v.Cur.offset <= minOff {
+				v.Cur.offset = 0
+				v.Prev.offset = 0
+			} else {
+				v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
+				if v.Prev.offset <= minOff {
+					v.Prev.offset = 0
+				} else {
+					v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
+				}
+			}
+			e.bTable[i] = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+	for {
+		const skipLog = 6
+		const doEvery = 1
+
+		nextS := s
+		var l int32
+		var t int32
+		for {
+			nextHashS := hashLen(cv, tableBits, hashShortBytes)
+			nextHashL := hash7(cv, tableBits)
+
+			s = nextS
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			// Fetch a short+long candidate
+			sCandidate := e.table[nextHashS]
+			lCandidate := e.bTable[nextHashL]
+			next := load6432(src, nextS)
+			entry := tableEntry{offset: s + e.cur}
+			e.table[nextHashS] = entry
+			eLong := &e.bTable[nextHashL]
+			eLong.Cur, eLong.Prev = entry, eLong.Cur
+
+			nextHashS = hashLen(next, tableBits, hashShortBytes)
+			nextHashL = hash7(next, tableBits)
+
+			t = lCandidate.Cur.offset - e.cur
+			if s-t < maxMatchOffset {
+				if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
+					// Store the next match
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+					eLong := &e.bTable[nextHashL]
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+					t2 := lCandidate.Prev.offset - e.cur
+					if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+						l = e.matchlen(s+4, t+4, src) + 4
+						ml1 := e.matchlen(s+4, t2+4, src) + 4
+						if ml1 > l {
+							t = t2
+							l = ml1
+							break
+						}
+					}
+					break
+				}
+				t = lCandidate.Prev.offset - e.cur
+				if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+					// Store the next match
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+					eLong := &e.bTable[nextHashL]
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+					break
+				}
+			}
+
+			t = sCandidate.offset - e.cur
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
+				// Found a 4 match...
+				l = e.matchlen(s+4, t+4, src) + 4
+				lCandidate = e.bTable[nextHashL]
+				// Store the next match
+
+				e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+				eLong := &e.bTable[nextHashL]
+				eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+				// If the next long is a candidate, use that...
+				t2 := lCandidate.Cur.offset - e.cur
+				if nextS-t2 < maxMatchOffset {
+					if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
+						ml := e.matchlen(nextS+4, t2+4, src) + 4
+						if ml > l {
+							t = t2
+							s = nextS
+							l = ml
+							break
+						}
+					}
+					// If the previous long is a candidate, use that...
+					t2 = lCandidate.Prev.offset - e.cur
+					if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
+						ml := e.matchlen(nextS+4, t2+4, src) + 4
+						if ml > l {
+							t = t2
+							s = nextS
+							l = ml
+							break
+						}
+					}
+				}
+				break
+			}
+			cv = next
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		if l == 0 {
+			// Extend the 4-byte match as long as possible.
+			l = e.matchlenLong(s+4, t+4, src) + 4
+		} else if l == maxMatchLength {
+			l += e.matchlenLong(s+l, t+l, src)
+		}
+
+		// Try to locate a better match by checking the end of best match...
+		if sAt := s + l; l < 30 && sAt < sLimit {
+			// Allow some bytes at the beginning to mismatch.
+			// Sweet spot is 2/3 bytes depending on input.
+			// 3 is only a little better when it is but sometimes a lot worse.
+			// The skipped bytes are tested in Extend backwards,
+			// and still picked up as part of the match if they do.
+			const skipBeginning = 2
+			eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
+			t2 := eLong - e.cur - l + skipBeginning
+			s2 := s + skipBeginning
+			off := s2 - t2
+			if t2 >= 0 && off < maxMatchOffset && off > 0 {
+				if l2 := e.matchlenLong(s2, t2, src); l2 > l {
+					t = t2
+					l = l2
+					s = s2
+				}
+			}
+		}
+
+		// Extend backwards
+		for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+			s--
+			t--
+			l++
+		}
+		if nextEmit < s {
+			if false {
+				emitLiteral(dst, src[nextEmit:s])
+			} else {
+				for _, v := range src[nextEmit:s] {
+					dst.tokens[dst.n] = token(v)
+					dst.litHist[v]++
+					dst.n++
+				}
+			}
+		}
+		if debugDeflate {
+			if t >= s {
+				panic(fmt.Sprintln("s-t", s, t))
+			}
+			if (s - t) > maxMatchOffset {
+				panic(fmt.Sprintln("mmo", s-t))
+			}
+			if l < baseMatchLength {
+				panic("bml")
+			}
+		}
+
+		dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+		s += l
+		nextEmit = s
+		if nextS >= s {
+			s = nextS + 1
+		}
+
+		if s >= sLimit {
+			goto emitRemainder
+		}
+
+		// Store every 3rd hash in-between.
+		if true {
+			const hashEvery = 3
+			i := s - l + 1
+			if i < s-1 {
+				cv := load6432(src, i)
+				t := tableEntry{offset: i + e.cur}
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = t
+				eLong := &e.bTable[hash7(cv, tableBits)]
+				eLong.Cur, eLong.Prev = t, eLong.Cur
+
+				// Do an long at i+1
+				cv >>= 8
+				t = tableEntry{offset: t.offset + 1}
+				eLong = &e.bTable[hash7(cv, tableBits)]
+				eLong.Cur, eLong.Prev = t, eLong.Cur
+
+				// We only have enough bits for a short entry at i+2
+				cv >>= 8
+				t = tableEntry{offset: t.offset + 1}
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = t
+
+				// Skip one - otherwise we risk hitting 's'
+				i += 4
+				for ; i < s-1; i += hashEvery {
+					cv := load6432(src, i)
+					t := tableEntry{offset: i + e.cur}
+					t2 := tableEntry{offset: t.offset + 1}
+					eLong := &e.bTable[hash7(cv, tableBits)]
+					eLong.Cur, eLong.Prev = t, eLong.Cur
+					e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
+				}
+			}
+		}
+
+		// We could immediately start working at s now, but to improve
+		// compression we first update the hash table at s-1 and at s.
+		x := load6432(src, s-1)
+		o := e.cur + s - 1
+		prevHashS := hashLen(x, tableBits, hashShortBytes)
+		prevHashL := hash7(x, tableBits)
+		e.table[prevHashS] = tableEntry{offset: o}
+		eLong := &e.bTable[prevHashL]
+		eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
+		cv = x >> 8
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
+
+// Reset the encoding table.
+func (e *fastEncL5Window) Reset() {
+	// We keep the same allocs, since we are compressing the same block sizes.
+	if cap(e.hist) < allocHistory {
+		e.hist = make([]byte, 0, allocHistory)
+	}
+
+	// We offset current position so everything will be out of reach.
+	// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
+	if e.cur <= int32(bufferReset) {
+		e.cur += e.maxOffset + int32(len(e.hist))
+	}
+	e.hist = e.hist[:0]
+}
+
+func (e *fastEncL5Window) addBlock(src []byte) int32 {
+	// check if we have space already
+	maxMatchOffset := e.maxOffset
+
+	if len(e.hist)+len(src) > cap(e.hist) {
+		if cap(e.hist) == 0 {
+			e.hist = make([]byte, 0, allocHistory)
+		} else {
+			if cap(e.hist) < int(maxMatchOffset*2) {
+				panic("unexpected buffer size")
+			}
+			// Move down
+			offset := int32(len(e.hist)) - maxMatchOffset
+			copy(e.hist[0:maxMatchOffset], e.hist[offset:])
+			e.cur += offset
+			e.hist = e.hist[:maxMatchOffset]
+		}
+	}
+	s := int32(len(e.hist))
+	e.hist = append(e.hist, src...)
+	return s
+}
+
+// matchlen will return the match length between offsets and t in src.
+// The maximum length returned is maxMatchLength - 4.
+// It is assumed that s > t, that t >=0 and s < len(src).
+func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 {
+	if debugDecode {
+		if t >= s {
+			panic(fmt.Sprint("t >=s:", t, s))
+		}
+		if int(s) >= len(src) {
+			panic(fmt.Sprint("s >= len(src):", s, len(src)))
+		}
+		if t < 0 {
+			panic(fmt.Sprint("t < 0:", t))
+		}
+		if s-t > e.maxOffset {
+			panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
+		}
+	}
+	s1 := int(s) + maxMatchLength - 4
+	if s1 > len(src) {
+		s1 = len(src)
+	}
+
+	// Extend the match to be as long as possible.
+	return int32(matchLen(src[s:s1], src[t:]))
+}
+
+// matchlenLong will return the match length between offsets and t in src.
+// It is assumed that s > t, that t >=0 and s < len(src).
+func (e *fastEncL5Window) matchlenLong(s, t int32, src []byte) int32 {
+	if debugDeflate {
+		if t >= s {
+			panic(fmt.Sprint("t >=s:", t, s))
+		}
+		if int(s) >= len(src) {
+			panic(fmt.Sprint("s >= len(src):", s, len(src)))
+		}
+		if t < 0 {
+			panic(fmt.Sprint("t < 0:", t))
+		}
+		if s-t > e.maxOffset {
+			panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
+		}
+	}
+	// Extend the match to be as long as possible.
+	return int32(matchLen(src[s:], src[t:]))
+}
--- a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go
+++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go
@ -0,0 +1,16 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package flate
+
+// matchLen returns how many bytes match in a and b
+//
+// It assumes that:
+//
+//	len(a) <= len(b) and len(a) > 0
+//
+//go:noescape
+func matchLen(a []byte, b []byte) int
--- a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s
+++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s
@ -0,0 +1,68 @@
+// Copied from S2 implementation.
+
+//go:build !appengine && !noasm && gc && !noasm
+
+#include "textflag.h"
+
+// func matchLen(a []byte, b []byte) int
+// Requires: BMI
+TEXT ·matchLen(SB), NOSPLIT, $0-56
+	MOVQ a_base+0(FP), AX
+	MOVQ b_base+24(FP), CX
+	MOVQ a_len+8(FP), DX
+
+	// matchLen
+	XORL SI, SI
+	CMPL DX, $0x08
+	JB   matchlen_match4_standalone
+
+matchlen_loopback_standalone:
+	MOVQ  (AX)(SI*1), BX
+	XORQ  (CX)(SI*1), BX
+	TESTQ BX, BX
+	JZ    matchlen_loop_standalone
+
+#ifdef GOAMD64_v3
+	TZCNTQ BX, BX
+#else
+	BSFQ BX, BX
+#endif
+	SARQ $0x03, BX
+	LEAL (SI)(BX*1), SI
+	JMP  gen_match_len_end
+
+matchlen_loop_standalone:
+	LEAL -8(DX), DX
+	LEAL 8(SI), SI
+	CMPL DX, $0x08
+	JAE  matchlen_loopback_standalone
+
+matchlen_match4_standalone:
+	CMPL DX, $0x04
+	JB   matchlen_match2_standalone
+	MOVL (AX)(SI*1), BX
+	CMPL (CX)(SI*1), BX
+	JNE  matchlen_match2_standalone
+	LEAL -4(DX), DX
+	LEAL 4(SI), SI
+
+matchlen_match2_standalone:
+	CMPL DX, $0x02
+	JB   matchlen_match1_standalone
+	MOVW (AX)(SI*1), BX
+	CMPW (CX)(SI*1), BX
+	JNE  matchlen_match1_standalone
+	LEAL -2(DX), DX
+	LEAL 2(SI), SI
+
+matchlen_match1_standalone:
+	CMPL DX, $0x01
+	JB   gen_match_len_end
+	MOVB (AX)(SI*1), BL
+	CMPB (CX)(SI*1), BL
+	JNE  gen_match_len_end
+	INCL SI
+
+gen_match_len_end:
+	MOVQ SI, ret+48(FP)
+	RET
--- a/vendor/github.com/klauspost/compress/flate/matchlen_generic.go
+++ b/vendor/github.com/klauspost/compress/flate/matchlen_generic.go
@ -0,0 +1,33 @@
+//go:build !amd64 || appengine || !gc || noasm
+// +build !amd64 appengine !gc noasm
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package flate
+
+import (
+	"encoding/binary"
+	"math/bits"
+)
+
+// matchLen returns the maximum common prefix length of a and b.
+// a must be the shortest of the two.
+func matchLen(a, b []byte) (n int) {
+	for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
+		diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
+		if diff != 0 {
+			return n + bits.TrailingZeros64(diff)>>3
+		}
+		n += 8
+	}
+
+	for i := range a {
+		if a[i] != b[i] {
+			break
+		}
+		n++
+	}
+	return n
+
+}
--- a/vendor/github.com/klauspost/compress/gzip/gunzip.go
+++ b/vendor/github.com/klauspost/compress/gzip/gunzip.go
@ -106,6 +106,7 @@ func (z *Reader) Reset(r io.Reader) error {
 	*z = Reader{
 		decompressor: z.decompressor,
 		multistream:  true,
+		br:           z.br,
 	}
 	if rr, ok := r.(flate.Reader); ok {
 		z.r = rr
--- a/vendor/github.com/klauspost/compress/gzip/gzip.go
+++ b/vendor/github.com/klauspost/compress/gzip/gzip.go
@ -74,6 +74,27 @@ func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
 	return z, nil
 }

+// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow.
+const MinCustomWindowSize = flate.MinCustomWindowSize
+
+// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow.
+const MaxCustomWindowSize = flate.MaxCustomWindowSize
+
+// NewWriterWindow returns a new Writer compressing data with a custom window size.
+// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize.
+func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) {
+	if windowSize < MinCustomWindowSize {
+		return nil, errors.New("gzip: requested window size less than MinWindowSize")
+	}
+	if windowSize > MaxCustomWindowSize {
+		return nil, errors.New("gzip: requested window size bigger than MaxCustomWindowSize")
+	}
+
+	z := new(Writer)
+	z.init(w, -windowSize)
+	return z, nil
+}
+
 func (z *Writer) init(w io.Writer, level int) {
 	compressor := z.compressor
 	if level != StatelessCompression {
--- a/vendor/github.com/klauspost/compress/s2/dict.go
+++ b/vendor/github.com/klauspost/compress/s2/dict.go
@ -106,6 +106,25 @@ func MakeDict(data []byte, searchStart []byte) *Dict {
 	return &d
 }

+// MakeDictManual will create a dictionary.
+// 'data' must be at least MinDictSize and less than or equal to MaxDictSize.
+// A manual first repeat index into data must be provided.
+// It must be less than len(data)-8.
+func MakeDictManual(data []byte, firstIdx uint16) *Dict {
+	if len(data) < MinDictSize || int(firstIdx) >= len(data)-8 || len(data) > MaxDictSize {
+		return nil
+	}
+	var d Dict
+	dict := data
+	d.dict = dict
+	if cap(d.dict) < len(d.dict)+16 {
+		d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
+	}
+
+	d.repeat = int(firstIdx)
+	return &d
+}
+
 // Encode returns the encoded form of src. The returned slice may be a sub-
 // slice of dst if dst was large enough to hold the entire encoded block.
 // Otherwise, a newly allocated slice will be returned.
--- a/vendor/github.com/klauspost/compress/s2/encode.go
+++ b/vendor/github.com/klauspost/compress/s2/encode.go
@ -57,7 +57,7 @@ func Encode(dst, src []byte) []byte {
 // The function returns -1 if no improvement could be achieved.
 // Using actual compression will most often produce better compression than the estimate.
 func EstimateBlockSize(src []byte) (d int) {
-	if len(src) < 6 || int64(len(src)) > 0xffffffff {
+	if len(src) <= inputMargin || int64(len(src)) > 0xffffffff {
 		return -1
 	}
 	if len(src) <= 1024 {
--- a/vendor/github.com/klauspost/compress/s2/encode_best.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_best.go
@ -157,6 +157,9 @@ func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
 				return m
 			}
 			matchDict := func(candidate, s int, first uint32, rep bool) match {
+				if s >= MaxDictSrcOffset {
+					return match{offset: candidate, s: s}
+				}
 				// Calculate offset as if in continuous array with s
 				offset := -len(dict.dict) + candidate
 				if best.length != 0 && best.s-best.offset == s-offset && !rep {
--- a/vendor/github.com/klauspost/compress/s2/encode_go.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_go.go
@ -316,6 +316,7 @@ func matchLen(a []byte, b []byte) int {
 	return len(a) + checked
 }

+// input must be > inputMargin
 func calcBlockSize(src []byte) (d int) {
 	// Initialize the hash table.
 	const (
@ -501,6 +502,7 @@ emitRemainder:
 	return d
 }

+// length must be > inputMargin.
 func calcBlockSizeSmall(src []byte) (d int) {
 	// Initialize the hash table.
 	const (
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
--- a/vendor/github.com/klauspost/compress/s2/index.go
+++ b/vendor/github.com/klauspost/compress/s2/index.go
@ -511,24 +511,22 @@ func IndexStream(r io.Reader) ([]byte, error) {

 // JSON returns the index as JSON text.
 func (i *Index) JSON() []byte {
+	type offset struct {
+		CompressedOffset   int64 `json:"compressed"`
+		UncompressedOffset int64 `json:"uncompressed"`
+	}
 	x := struct {
-		TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
-		TotalCompressed   int64 `json:"total_compressed"`   // Total Compressed size if known. Will be -1 if unknown.
-		Offsets           []struct {
-			CompressedOffset   int64 `json:"compressed"`
-			UncompressedOffset int64 `json:"uncompressed"`
-		} `json:"offsets"`
-		EstBlockUncomp int64 `json:"est_block_uncompressed"`
+		TotalUncompressed int64    `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
+		TotalCompressed   int64    `json:"total_compressed"`   // Total Compressed size if known. Will be -1 if unknown.
+		Offsets           []offset `json:"offsets"`
+		EstBlockUncomp    int64    `json:"est_block_uncompressed"`
 	}{
 		TotalUncompressed: i.TotalUncompressed,
 		TotalCompressed:   i.TotalCompressed,
 		EstBlockUncomp:    i.estBlockUncomp,
 	}
 	for _, v := range i.info {
-		x.Offsets = append(x.Offsets, struct {
-			CompressedOffset   int64 `json:"compressed"`
-			UncompressedOffset int64 `json:"uncompressed"`
-		}{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
+		x.Offsets = append(x.Offsets, offset{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
 	}
 	b, _ := json.MarshalIndent(x, "", "  ")
 	return b