diff --git a/go.mod b/go.mod index 17ac0fd7d..c692215d9 100644 --- a/go.mod +++ b/go.mod @@ -57,7 +57,7 @@ require ( github.com/ncruces/go-sqlite3 v0.25.0 github.com/oklog/ulid v1.3.1 github.com/pquerna/otp v1.4.0 - github.com/prometheus/client_golang v1.21.1 + github.com/prometheus/client_golang v1.22.0 github.com/rivo/uniseg v0.4.7 github.com/spf13/cobra v1.9.1 github.com/spf13/viper v1.20.1 diff --git a/go.sum b/go.sum index 450b72a67..a963526b9 100644 --- a/go.sum +++ b/go.sum @@ -348,8 +348,8 @@ github.com/pquerna/otp v1.4.0 h1:wZvl1TIVxKRThZIBiwOOHOGP/1+nZyWBil9Y2XNEDzg= github.com/pquerna/otp v1.4.0/go.mod h1:dkJfzwRKNiegxyNb54X/3fLwhCynbMspSyWKnvi1AEg= github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= -github.com/prometheus/client_golang v1.21.1 h1:DOvXXTqVzvkIewV/CDPFdejpMCGeMcbGCQ8YOmu+Ibk= -github.com/prometheus/client_golang v1.21.1/go.mod h1:U9NM32ykUErtVBxdvD3zfi+EuFkkaBvMb09mIfe0Zgg= +github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= +github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io= diff --git a/vendor/github.com/klauspost/compress/.gitattributes b/vendor/github.com/klauspost/compress/.gitattributes deleted file mode 100644 index 402433593..000000000 --- a/vendor/github.com/klauspost/compress/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -* -text -*.bin -text -diff diff --git a/vendor/github.com/klauspost/compress/.gitignore b/vendor/github.com/klauspost/compress/.gitignore deleted file mode 100644 index d31b37815..000000000 --- a/vendor/github.com/klauspost/compress/.gitignore +++ /dev/null @@ -1,32 +0,0 @@ -# Compiled Object files, Static and Dynamic libs (Shared Objects) -*.o -*.a -*.so - -# Folders -_obj -_test - -# Architecture specific extensions/prefixes -*.[568vq] -[568vq].out - -*.cgo1.go -*.cgo2.c -_cgo_defun.c -_cgo_gotypes.go -_cgo_export.* - -_testmain.go - -*.exe -*.test -*.prof -/s2/cmd/_s2sx/sfx-exe - -# Linux perf files -perf.data -perf.data.old - -# gdb history -.gdb_history diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml deleted file mode 100644 index 4528059ca..000000000 --- a/vendor/github.com/klauspost/compress/.goreleaser.yml +++ /dev/null @@ -1,123 +0,0 @@ -version: 2 - -before: - hooks: - - ./gen.sh - -builds: - - - id: "s2c" - binary: s2c - main: ./s2/cmd/s2c/main.go - flags: - - -trimpath - env: - - CGO_ENABLED=0 - goos: - - aix - - linux - - freebsd - - netbsd - - windows - - darwin - goarch: - - 386 - - amd64 - - arm - - arm64 - - ppc64 - - ppc64le - - mips64 - - mips64le - goarm: - - 7 - - - id: "s2d" - binary: s2d - main: ./s2/cmd/s2d/main.go - flags: - - -trimpath - env: - - CGO_ENABLED=0 - goos: - - aix - - linux - - freebsd - - netbsd - - windows - - darwin - goarch: - - 386 - - amd64 - - arm - - arm64 - - ppc64 - - ppc64le - - mips64 - - mips64le - goarm: - - 7 - - - id: "s2sx" - binary: s2sx - main: ./s2/cmd/_s2sx/main.go - flags: - - -modfile=s2sx.mod - - -trimpath - env: - - CGO_ENABLED=0 - goos: - - aix - - linux - - freebsd - - netbsd - - windows - - darwin - goarch: - - 386 - - amd64 - - arm - - arm64 - - ppc64 - - ppc64le - - mips64 - - mips64le - goarm: - - 7 - -archives: - - - id: s2-binaries - name_template: "s2-{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}" - format_overrides: - - goos: windows - format: zip - files: - - unpack/* - - s2/LICENSE - - s2/README.md -checksum: - name_template: 'checksums.txt' -snapshot: - version_template: "{{ .Tag }}-next" -changelog: - sort: asc - filters: - exclude: - - '^doc:' - - '^docs:' - - '^test:' - - '^tests:' - - '^Update\sREADME.md' - -nfpms: - - - file_name_template: "s2_package__{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}" - vendor: Klaus Post - homepage: https://github.com/klauspost/compress - maintainer: Klaus Post - description: S2 Compression Tool - license: BSD 3-Clause - formats: - - deb - - rpm diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md deleted file mode 100644 index 244ee19c4..000000000 --- a/vendor/github.com/klauspost/compress/README.md +++ /dev/null @@ -1,671 +0,0 @@ -# compress - -This package provides various compression algorithms. - -* [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression in pure Go. -* [S2](https://github.com/klauspost/compress/tree/master/s2#s2-compression) is a high performance replacement for Snappy. -* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib). -* [snappy](https://github.com/klauspost/compress/tree/master/snappy) is a drop-in replacement for `github.com/golang/snappy` offering better compression and concurrent streams. -* [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding. -* [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently. -* [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation. - -[![Go Reference](https://pkg.go.dev/badge/klauspost/compress.svg)](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories) -[![Go](https://github.com/klauspost/compress/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/compress/actions/workflows/go.yml) -[![Sourcegraph Badge](https://sourcegraph.com/github.com/klauspost/compress/-/badge.svg)](https://sourcegraph.com/github.com/klauspost/compress?badge) - -# package usage - -Use `go get github.com/klauspost/compress@latest` to add it to your project. - -This package will support the current Go version and 2 versions back. - -* Use the `nounsafe` tag to disable all use of the "unsafe" package. -* Use the `noasm` tag to disable all assembly across packages. - -Use the links above for more information on each. - -# changelog - -* Feb 19th, 2025 - [1.18.0](https://github.com/klauspost/compress/releases/tag/v1.18.0) - * Add unsafe little endian loaders https://github.com/klauspost/compress/pull/1036 - * fix: check `r.err != nil` but return a nil value error `err` by @alingse in https://github.com/klauspost/compress/pull/1028 - * flate: Simplify L4-6 loading https://github.com/klauspost/compress/pull/1043 - * flate: Simplify matchlen (remove asm) https://github.com/klauspost/compress/pull/1045 - * s2: Improve small block compression speed w/o asm https://github.com/klauspost/compress/pull/1048 - * flate: Fix matchlen L5+L6 https://github.com/klauspost/compress/pull/1049 - * flate: Cleanup & reduce casts https://github.com/klauspost/compress/pull/1050 - -* Oct 11th, 2024 - [1.17.11](https://github.com/klauspost/compress/releases/tag/v1.17.11) - * zstd: Fix extra CRC written with multiple Close calls https://github.com/klauspost/compress/pull/1017 - * s2: Don't use stack for index tables https://github.com/klauspost/compress/pull/1014 - * gzhttp: No content-type on no body response code by @juliens in https://github.com/klauspost/compress/pull/1011 - * gzhttp: Do not set the content-type when response has no body by @kevinpollet in https://github.com/klauspost/compress/pull/1013 - -* Sep 23rd, 2024 - [1.17.10](https://github.com/klauspost/compress/releases/tag/v1.17.10) - * gzhttp: Add TransportAlwaysDecompress option. https://github.com/klauspost/compress/pull/978 - * gzhttp: Add supported decompress request body by @mirecl in https://github.com/klauspost/compress/pull/1002 - * s2: Add EncodeBuffer buffer recycling callback https://github.com/klauspost/compress/pull/982 - * zstd: Improve memory usage on small streaming encodes https://github.com/klauspost/compress/pull/1007 - * flate: read data written with partial flush by @vajexal in https://github.com/klauspost/compress/pull/996 - -* Jun 12th, 2024 - [1.17.9](https://github.com/klauspost/compress/releases/tag/v1.17.9) - * s2: Reduce ReadFrom temporary allocations https://github.com/klauspost/compress/pull/949 - * flate, zstd: Shave some bytes off amd64 matchLen by @greatroar in https://github.com/klauspost/compress/pull/963 - * Upgrade zip/zlib to 1.22.4 upstream https://github.com/klauspost/compress/pull/970 https://github.com/klauspost/compress/pull/971 - * zstd: BuildDict fails with RLE table https://github.com/klauspost/compress/pull/951 - -* Apr 9th, 2024 - [1.17.8](https://github.com/klauspost/compress/releases/tag/v1.17.8) - * zstd: Reject blocks where reserved values are not 0 https://github.com/klauspost/compress/pull/885 - * zstd: Add RLE detection+encoding https://github.com/klauspost/compress/pull/938 - -* Feb 21st, 2024 - [1.17.7](https://github.com/klauspost/compress/releases/tag/v1.17.7) - * s2: Add AsyncFlush method: Complete the block without flushing by @Jille in https://github.com/klauspost/compress/pull/927 - * s2: Fix literal+repeat exceeds dst crash https://github.com/klauspost/compress/pull/930 - -* Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6) - * zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923 - * s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925 - -* Jan 26th, 2024 - [v1.17.5](https://github.com/klauspost/compress/releases/tag/v1.17.5) - * flate: Fix reset with dictionary on custom window encodes https://github.com/klauspost/compress/pull/912 - * zstd: Add Frame header encoding and stripping https://github.com/klauspost/compress/pull/908 - * zstd: Limit better/best default window to 8MB https://github.com/klauspost/compress/pull/913 - * zstd: Speed improvements by @greatroar in https://github.com/klauspost/compress/pull/896 https://github.com/klauspost/compress/pull/910 - * s2: Fix callbacks for skippable blocks and disallow 0xfe (Padding) by @Jille in https://github.com/klauspost/compress/pull/916 https://github.com/klauspost/compress/pull/917 -https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/compress/pull/918 - -* Dec 1st, 2023 - [v1.17.4](https://github.com/klauspost/compress/releases/tag/v1.17.4) - * huff0: Speed up symbol counting by @greatroar in https://github.com/klauspost/compress/pull/887 - * huff0: Remove byteReader by @greatroar in https://github.com/klauspost/compress/pull/886 - * gzhttp: Allow overriding decompression on transport https://github.com/klauspost/compress/pull/892 - * gzhttp: Clamp compression level https://github.com/klauspost/compress/pull/890 - * gzip: Error out if reserved bits are set https://github.com/klauspost/compress/pull/891 - -* Nov 15th, 2023 - [v1.17.3](https://github.com/klauspost/compress/releases/tag/v1.17.3) - * fse: Fix max header size https://github.com/klauspost/compress/pull/881 - * zstd: Improve better/best compression https://github.com/klauspost/compress/pull/877 - * gzhttp: Fix missing content type on Close https://github.com/klauspost/compress/pull/883 - -* Oct 22nd, 2023 - [v1.17.2](https://github.com/klauspost/compress/releases/tag/v1.17.2) - * zstd: Fix rare *CORRUPTION* output in "best" mode. See https://github.com/klauspost/compress/pull/876 - -* Oct 14th, 2023 - [v1.17.1](https://github.com/klauspost/compress/releases/tag/v1.17.1) - * s2: Fix S2 "best" dictionary wrong encoding https://github.com/klauspost/compress/pull/871 - * flate: Reduce allocations in decompressor and minor code improvements by @fakefloordiv in https://github.com/klauspost/compress/pull/869 - * s2: Fix EstimateBlockSize on 6&7 length input https://github.com/klauspost/compress/pull/867 - -* Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0) - * Add experimental dictionary builder https://github.com/klauspost/compress/pull/853 - * Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838 - * flate: Add limited window compression https://github.com/klauspost/compress/pull/843 - * s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839 - * flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837 - * gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860 - -
- See changes to v1.16.x - - -* July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7) - * zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829 - * s2: add GetBufferCapacity() method by @GiedriusS in https://github.com/klauspost/compress/pull/832 - -* June 13, 2023 - [v1.16.6](https://github.com/klauspost/compress/releases/tag/v1.16.6) - * zstd: correctly ignore WithEncoderPadding(1) by @ianlancetaylor in https://github.com/klauspost/compress/pull/806 - * zstd: Add amd64 match length assembly https://github.com/klauspost/compress/pull/824 - * gzhttp: Handle informational headers by @rtribotte in https://github.com/klauspost/compress/pull/815 - * s2: Improve Better compression slightly https://github.com/klauspost/compress/pull/663 - -* Apr 16, 2023 - [v1.16.5](https://github.com/klauspost/compress/releases/tag/v1.16.5) - * zstd: readByte needs to use io.ReadFull by @jnoxon in https://github.com/klauspost/compress/pull/802 - * gzip: Fix WriterTo after initial read https://github.com/klauspost/compress/pull/804 - -* Apr 5, 2023 - [v1.16.4](https://github.com/klauspost/compress/releases/tag/v1.16.4) - * zstd: Improve zstd best efficiency by @greatroar and @klauspost in https://github.com/klauspost/compress/pull/784 - * zstd: Respect WithAllLitEntropyCompression https://github.com/klauspost/compress/pull/792 - * zstd: Fix amd64 not always detecting corrupt data https://github.com/klauspost/compress/pull/785 - * zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795 - * s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779 - * s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780 - * gzhttp: Support ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799 - -* Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1) - * zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776 - * gzhttp: Add optional [BREACH mitigation](https://github.com/klauspost/compress/tree/master/gzhttp#breach-mitigation). https://github.com/klauspost/compress/pull/762 https://github.com/klauspost/compress/pull/768 https://github.com/klauspost/compress/pull/769 https://github.com/klauspost/compress/pull/770 https://github.com/klauspost/compress/pull/767 - * s2: Add Intel LZ4s converter https://github.com/klauspost/compress/pull/766 - * zstd: Minor bug fixes https://github.com/klauspost/compress/pull/771 https://github.com/klauspost/compress/pull/772 https://github.com/klauspost/compress/pull/773 - * huff0: Speed up compress1xDo by @greatroar in https://github.com/klauspost/compress/pull/774 - -* Feb 26, 2023 - [v1.16.0](https://github.com/klauspost/compress/releases/tag/v1.16.0) - * s2: Add [Dictionary](https://github.com/klauspost/compress/tree/master/s2#dictionaries) support. https://github.com/klauspost/compress/pull/685 - * s2: Add Compression Size Estimate. https://github.com/klauspost/compress/pull/752 - * s2: Add support for custom stream encoder. https://github.com/klauspost/compress/pull/755 - * s2: Add LZ4 block converter. https://github.com/klauspost/compress/pull/748 - * s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747 - * s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746 -
- -
- See changes to v1.15.x - -* Jan 21st, 2023 (v1.15.15) - * deflate: Improve level 7-9 https://github.com/klauspost/compress/pull/739 - * zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728 - * zstd: Various speed improvements by @greatroar https://github.com/klauspost/compress/pull/741 https://github.com/klauspost/compress/pull/734 https://github.com/klauspost/compress/pull/736 https://github.com/klauspost/compress/pull/744 https://github.com/klauspost/compress/pull/743 https://github.com/klauspost/compress/pull/745 - * gzhttp: Add SuffixETag() and DropETag() options to prevent ETag collisions on compressed responses by @willbicks in https://github.com/klauspost/compress/pull/740 - -* Jan 3rd, 2023 (v1.15.14) - - * flate: Improve speed in big stateless blocks https://github.com/klauspost/compress/pull/718 - * zstd: Minor speed tweaks by @greatroar in https://github.com/klauspost/compress/pull/716 https://github.com/klauspost/compress/pull/720 - * export NoGzipResponseWriter for custom ResponseWriter wrappers by @harshavardhana in https://github.com/klauspost/compress/pull/722 - * s2: Add example for indexing and existing stream https://github.com/klauspost/compress/pull/723 - -* Dec 11, 2022 (v1.15.13) - * zstd: Add [MaxEncodedSize](https://pkg.go.dev/github.com/klauspost/compress@v1.15.13/zstd#Encoder.MaxEncodedSize) to encoder https://github.com/klauspost/compress/pull/691 - * zstd: Various tweaks and improvements https://github.com/klauspost/compress/pull/693 https://github.com/klauspost/compress/pull/695 https://github.com/klauspost/compress/pull/696 https://github.com/klauspost/compress/pull/701 https://github.com/klauspost/compress/pull/702 https://github.com/klauspost/compress/pull/703 https://github.com/klauspost/compress/pull/704 https://github.com/klauspost/compress/pull/705 https://github.com/klauspost/compress/pull/706 https://github.com/klauspost/compress/pull/707 https://github.com/klauspost/compress/pull/708 - -* Oct 26, 2022 (v1.15.12) - - * zstd: Tweak decoder allocs. https://github.com/klauspost/compress/pull/680 - * gzhttp: Always delete `HeaderNoCompression` https://github.com/klauspost/compress/pull/683 - -* Sept 26, 2022 (v1.15.11) - - * flate: Improve level 1-3 compression https://github.com/klauspost/compress/pull/678 - * zstd: Improve "best" compression by @nightwolfz in https://github.com/klauspost/compress/pull/677 - * zstd: Fix+reduce decompression allocations https://github.com/klauspost/compress/pull/668 - * zstd: Fix non-effective noescape tag https://github.com/klauspost/compress/pull/667 - -* Sept 16, 2022 (v1.15.10) - - * zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649 - * Add Go 1.19 - deprecate Go 1.16 https://github.com/klauspost/compress/pull/651 - * flate: Improve level 5+6 compression https://github.com/klauspost/compress/pull/656 - * zstd: Improve "better" compression https://github.com/klauspost/compress/pull/657 - * s2: Improve "best" compression https://github.com/klauspost/compress/pull/658 - * s2: Improve "better" compression. https://github.com/klauspost/compress/pull/635 - * s2: Slightly faster non-assembly decompression https://github.com/klauspost/compress/pull/646 - * Use arrays for constant size copies https://github.com/klauspost/compress/pull/659 - -* July 21, 2022 (v1.15.9) - - * zstd: Fix decoder crash on amd64 (no BMI) on invalid input https://github.com/klauspost/compress/pull/645 - * zstd: Disable decoder extended memory copies (amd64) due to possible crashes https://github.com/klauspost/compress/pull/644 - * zstd: Allow single segments up to "max decoded size" https://github.com/klauspost/compress/pull/643 - -* July 13, 2022 (v1.15.8) - - * gzip: fix stack exhaustion bug in Reader.Read https://github.com/klauspost/compress/pull/641 - * s2: Add Index header trim/restore https://github.com/klauspost/compress/pull/638 - * zstd: Optimize seqdeq amd64 asm by @greatroar in https://github.com/klauspost/compress/pull/636 - * zstd: Improve decoder memcopy https://github.com/klauspost/compress/pull/637 - * huff0: Pass a single bitReader pointer to asm by @greatroar in https://github.com/klauspost/compress/pull/634 - * zstd: Branchless getBits for amd64 w/o BMI2 by @greatroar in https://github.com/klauspost/compress/pull/640 - * gzhttp: Remove header before writing https://github.com/klauspost/compress/pull/639 - -* June 29, 2022 (v1.15.7) - - * s2: Fix absolute forward seeks https://github.com/klauspost/compress/pull/633 - * zip: Merge upstream https://github.com/klauspost/compress/pull/631 - * zip: Re-add zip64 fix https://github.com/klauspost/compress/pull/624 - * zstd: translate fseDecoder.buildDtable into asm by @WojciechMula in https://github.com/klauspost/compress/pull/598 - * flate: Faster histograms https://github.com/klauspost/compress/pull/620 - * deflate: Use compound hcode https://github.com/klauspost/compress/pull/622 - -* June 3, 2022 (v1.15.6) - * s2: Improve coding for long, close matches https://github.com/klauspost/compress/pull/613 - * s2c: Add Snappy/S2 stream recompression https://github.com/klauspost/compress/pull/611 - * zstd: Always use configured block size https://github.com/klauspost/compress/pull/605 - * zstd: Fix incorrect hash table placement for dict encoding in default https://github.com/klauspost/compress/pull/606 - * zstd: Apply default config to ZipDecompressor without options https://github.com/klauspost/compress/pull/608 - * gzhttp: Exclude more common archive formats https://github.com/klauspost/compress/pull/612 - * s2: Add ReaderIgnoreCRC https://github.com/klauspost/compress/pull/609 - * s2: Remove sanity load on index creation https://github.com/klauspost/compress/pull/607 - * snappy: Use dedicated function for scoring https://github.com/klauspost/compress/pull/614 - * s2c+s2d: Use official snappy framed extension https://github.com/klauspost/compress/pull/610 - -* May 25, 2022 (v1.15.5) - * s2: Add concurrent stream decompression https://github.com/klauspost/compress/pull/602 - * s2: Fix final emit oob read crash on amd64 https://github.com/klauspost/compress/pull/601 - * huff0: asm implementation of Decompress1X by @WojciechMula https://github.com/klauspost/compress/pull/596 - * zstd: Use 1 less goroutine for stream decoding https://github.com/klauspost/compress/pull/588 - * zstd: Copy literal in 16 byte blocks when possible https://github.com/klauspost/compress/pull/592 - * zstd: Speed up when WithDecoderLowmem(false) https://github.com/klauspost/compress/pull/599 - * zstd: faster next state update in BMI2 version of decode by @WojciechMula in https://github.com/klauspost/compress/pull/593 - * huff0: Do not check max size when reading table. https://github.com/klauspost/compress/pull/586 - * flate: Inplace hashing for level 7-9 https://github.com/klauspost/compress/pull/590 - - -* May 11, 2022 (v1.15.4) - * huff0: decompress directly into output by @WojciechMula in [#577](https://github.com/klauspost/compress/pull/577) - * inflate: Keep dict on stack [#581](https://github.com/klauspost/compress/pull/581) - * zstd: Faster decoding memcopy in asm [#583](https://github.com/klauspost/compress/pull/583) - * zstd: Fix ignored crc [#580](https://github.com/klauspost/compress/pull/580) - -* May 5, 2022 (v1.15.3) - * zstd: Allow to ignore checksum checking by @WojciechMula [#572](https://github.com/klauspost/compress/pull/572) - * s2: Fix incorrect seek for io.SeekEnd in [#575](https://github.com/klauspost/compress/pull/575) - -* Apr 26, 2022 (v1.15.2) - * zstd: Add x86-64 assembly for decompression on streams and blocks. Contributed by [@WojciechMula](https://github.com/WojciechMula). Typically 2x faster. [#528](https://github.com/klauspost/compress/pull/528) [#531](https://github.com/klauspost/compress/pull/531) [#545](https://github.com/klauspost/compress/pull/545) [#537](https://github.com/klauspost/compress/pull/537) - * zstd: Add options to ZipDecompressor and fixes [#539](https://github.com/klauspost/compress/pull/539) - * s2: Use sorted search for index [#555](https://github.com/klauspost/compress/pull/555) - * Minimum version is Go 1.16, added CI test on 1.18. - -* Mar 11, 2022 (v1.15.1) - * huff0: Add x86 assembly of Decode4X by @WojciechMula in [#512](https://github.com/klauspost/compress/pull/512) - * zstd: Reuse zip decoders in [#514](https://github.com/klauspost/compress/pull/514) - * zstd: Detect extra block data and report as corrupted in [#520](https://github.com/klauspost/compress/pull/520) - * zstd: Handle zero sized frame content size stricter in [#521](https://github.com/klauspost/compress/pull/521) - * zstd: Add stricter block size checks in [#523](https://github.com/klauspost/compress/pull/523) - -* Mar 3, 2022 (v1.15.0) - * zstd: Refactor decoder [#498](https://github.com/klauspost/compress/pull/498) - * zstd: Add stream encoding without goroutines [#505](https://github.com/klauspost/compress/pull/505) - * huff0: Prevent single blocks exceeding 16 bits by @klauspost in[#507](https://github.com/klauspost/compress/pull/507) - * flate: Inline literal emission [#509](https://github.com/klauspost/compress/pull/509) - * gzhttp: Add zstd to transport [#400](https://github.com/klauspost/compress/pull/400) - * gzhttp: Make content-type optional [#510](https://github.com/klauspost/compress/pull/510) - -Both compression and decompression now supports "synchronous" stream operations. This means that whenever "concurrency" is set to 1, they will operate without spawning goroutines. - -Stream decompression is now faster on asynchronous, since the goroutine allocation much more effectively splits the workload. On typical streams this will typically use 2 cores fully for decompression. When a stream has finished decoding no goroutines will be left over, so decoders can now safely be pooled and still be garbage collected. - -While the release has been extensively tested, it is recommended to testing when upgrading. - -
- -
- See changes to v1.14.x - -* Feb 22, 2022 (v1.14.4) - * flate: Fix rare huffman only (-2) corruption. [#503](https://github.com/klauspost/compress/pull/503) - * zip: Update deprecated CreateHeaderRaw to correctly call CreateRaw by @saracen in [#502](https://github.com/klauspost/compress/pull/502) - * zip: don't read data descriptor early by @saracen in [#501](https://github.com/klauspost/compress/pull/501) #501 - * huff0: Use static decompression buffer up to 30% faster [#499](https://github.com/klauspost/compress/pull/499) [#500](https://github.com/klauspost/compress/pull/500) - -* Feb 17, 2022 (v1.14.3) - * flate: Improve fastest levels compression speed ~10% more throughput. [#482](https://github.com/klauspost/compress/pull/482) [#489](https://github.com/klauspost/compress/pull/489) [#490](https://github.com/klauspost/compress/pull/490) [#491](https://github.com/klauspost/compress/pull/491) [#494](https://github.com/klauspost/compress/pull/494) [#478](https://github.com/klauspost/compress/pull/478) - * flate: Faster decompression speed, ~5-10%. [#483](https://github.com/klauspost/compress/pull/483) - * s2: Faster compression with Go v1.18 and amd64 microarch level 3+. [#484](https://github.com/klauspost/compress/pull/484) [#486](https://github.com/klauspost/compress/pull/486) - -* Jan 25, 2022 (v1.14.2) - * zstd: improve header decoder by @dsnet [#476](https://github.com/klauspost/compress/pull/476) - * zstd: Add bigger default blocks [#469](https://github.com/klauspost/compress/pull/469) - * zstd: Remove unused decompression buffer [#470](https://github.com/klauspost/compress/pull/470) - * zstd: Fix logically dead code by @ningmingxiao [#472](https://github.com/klauspost/compress/pull/472) - * flate: Improve level 7-9 [#471](https://github.com/klauspost/compress/pull/471) [#473](https://github.com/klauspost/compress/pull/473) - * zstd: Add noasm tag for xxhash [#475](https://github.com/klauspost/compress/pull/475) - -* Jan 11, 2022 (v1.14.1) - * s2: Add stream index in [#462](https://github.com/klauspost/compress/pull/462) - * flate: Speed and efficiency improvements in [#439](https://github.com/klauspost/compress/pull/439) [#461](https://github.com/klauspost/compress/pull/461) [#455](https://github.com/klauspost/compress/pull/455) [#452](https://github.com/klauspost/compress/pull/452) [#458](https://github.com/klauspost/compress/pull/458) - * zstd: Performance improvement in [#420]( https://github.com/klauspost/compress/pull/420) [#456](https://github.com/klauspost/compress/pull/456) [#437](https://github.com/klauspost/compress/pull/437) [#467](https://github.com/klauspost/compress/pull/467) [#468](https://github.com/klauspost/compress/pull/468) - * zstd: add arm64 xxhash assembly in [#464](https://github.com/klauspost/compress/pull/464) - * Add garbled for binaries for s2 in [#445](https://github.com/klauspost/compress/pull/445) -
- -
- See changes to v1.13.x - -* Aug 30, 2021 (v1.13.5) - * gz/zlib/flate: Alias stdlib errors [#425](https://github.com/klauspost/compress/pull/425) - * s2: Add block support to commandline tools [#413](https://github.com/klauspost/compress/pull/413) - * zstd: pooledZipWriter should return Writers to the same pool [#426](https://github.com/klauspost/compress/pull/426) - * Removed golang/snappy as external dependency for tests [#421](https://github.com/klauspost/compress/pull/421) - -* Aug 12, 2021 (v1.13.4) - * Add [snappy replacement package](https://github.com/klauspost/compress/tree/master/snappy). - * zstd: Fix incorrect encoding in "best" mode [#415](https://github.com/klauspost/compress/pull/415) - -* Aug 3, 2021 (v1.13.3) - * zstd: Improve Best compression [#404](https://github.com/klauspost/compress/pull/404) - * zstd: Fix WriteTo error forwarding [#411](https://github.com/klauspost/compress/pull/411) - * gzhttp: Return http.HandlerFunc instead of http.Handler. Unlikely breaking change. [#406](https://github.com/klauspost/compress/pull/406) - * s2sx: Fix max size error [#399](https://github.com/klauspost/compress/pull/399) - * zstd: Add optional stream content size on reset [#401](https://github.com/klauspost/compress/pull/401) - * zstd: use SpeedBestCompression for level >= 10 [#410](https://github.com/klauspost/compress/pull/410) - -* Jun 14, 2021 (v1.13.1) - * s2: Add full Snappy output support [#396](https://github.com/klauspost/compress/pull/396) - * zstd: Add configurable [Decoder window](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithDecoderMaxWindow) size [#394](https://github.com/klauspost/compress/pull/394) - * gzhttp: Add header to skip compression [#389](https://github.com/klauspost/compress/pull/389) - * s2: Improve speed with bigger output margin [#395](https://github.com/klauspost/compress/pull/395) - -* Jun 3, 2021 (v1.13.0) - * Added [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp#gzip-handler) which allows wrapping HTTP servers and clients with GZIP compressors. - * zstd: Detect short invalid signatures [#382](https://github.com/klauspost/compress/pull/382) - * zstd: Spawn decoder goroutine only if needed. [#380](https://github.com/klauspost/compress/pull/380) -
- - -
- See changes to v1.12.x - -* May 25, 2021 (v1.12.3) - * deflate: Better/faster Huffman encoding [#374](https://github.com/klauspost/compress/pull/374) - * deflate: Allocate less for history. [#375](https://github.com/klauspost/compress/pull/375) - * zstd: Forward read errors [#373](https://github.com/klauspost/compress/pull/373) - -* Apr 27, 2021 (v1.12.2) - * zstd: Improve better/best compression [#360](https://github.com/klauspost/compress/pull/360) [#364](https://github.com/klauspost/compress/pull/364) [#365](https://github.com/klauspost/compress/pull/365) - * zstd: Add helpers to compress/decompress zstd inside zip files [#363](https://github.com/klauspost/compress/pull/363) - * deflate: Improve level 5+6 compression [#367](https://github.com/klauspost/compress/pull/367) - * s2: Improve better/best compression [#358](https://github.com/klauspost/compress/pull/358) [#359](https://github.com/klauspost/compress/pull/358) - * s2: Load after checking src limit on amd64. [#362](https://github.com/klauspost/compress/pull/362) - * s2sx: Limit max executable size [#368](https://github.com/klauspost/compress/pull/368) - -* Apr 14, 2021 (v1.12.1) - * snappy package removed. Upstream added as dependency. - * s2: Better compression in "best" mode [#353](https://github.com/klauspost/compress/pull/353) - * s2sx: Add stdin input and detect pre-compressed from signature [#352](https://github.com/klauspost/compress/pull/352) - * s2c/s2d: Add http as possible input [#348](https://github.com/klauspost/compress/pull/348) - * s2c/s2d/s2sx: Always truncate when writing files [#352](https://github.com/klauspost/compress/pull/352) - * zstd: Reduce memory usage further when using [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) [#346](https://github.com/klauspost/compress/pull/346) - * s2: Fix potential problem with amd64 assembly and profilers [#349](https://github.com/klauspost/compress/pull/349) -
- -
- See changes to v1.11.x - -* Mar 26, 2021 (v1.11.13) - * zstd: Big speedup on small dictionary encodes [#344](https://github.com/klauspost/compress/pull/344) [#345](https://github.com/klauspost/compress/pull/345) - * zstd: Add [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) encoder option [#336](https://github.com/klauspost/compress/pull/336) - * deflate: Improve entropy compression [#338](https://github.com/klauspost/compress/pull/338) - * s2: Clean up and minor performance improvement in best [#341](https://github.com/klauspost/compress/pull/341) - -* Mar 5, 2021 (v1.11.12) - * s2: Add `s2sx` binary that creates [self extracting archives](https://github.com/klauspost/compress/tree/master/s2#s2sx-self-extracting-archives). - * s2: Speed up decompression on non-assembly platforms [#328](https://github.com/klauspost/compress/pull/328) - -* Mar 1, 2021 (v1.11.9) - * s2: Add ARM64 decompression assembly. Around 2x output speed. [#324](https://github.com/klauspost/compress/pull/324) - * s2: Improve "better" speed and efficiency. [#325](https://github.com/klauspost/compress/pull/325) - * s2: Fix binaries. - -* Feb 25, 2021 (v1.11.8) - * s2: Fixed occasional out-of-bounds write on amd64. Upgrade recommended. - * s2: Add AMD64 assembly for better mode. 25-50% faster. [#315](https://github.com/klauspost/compress/pull/315) - * s2: Less upfront decoder allocation. [#322](https://github.com/klauspost/compress/pull/322) - * zstd: Faster "compression" of incompressible data. [#314](https://github.com/klauspost/compress/pull/314) - * zip: Fix zip64 headers. [#313](https://github.com/klauspost/compress/pull/313) - -* Jan 14, 2021 (v1.11.7) - * Use Bytes() interface to get bytes across packages. [#309](https://github.com/klauspost/compress/pull/309) - * s2: Add 'best' compression option. [#310](https://github.com/klauspost/compress/pull/310) - * s2: Add ReaderMaxBlockSize, changes `s2.NewReader` signature to include varargs. [#311](https://github.com/klauspost/compress/pull/311) - * s2: Fix crash on small better buffers. [#308](https://github.com/klauspost/compress/pull/308) - * s2: Clean up decoder. [#312](https://github.com/klauspost/compress/pull/312) - -* Jan 7, 2021 (v1.11.6) - * zstd: Make decoder allocations smaller [#306](https://github.com/klauspost/compress/pull/306) - * zstd: Free Decoder resources when Reset is called with a nil io.Reader [#305](https://github.com/klauspost/compress/pull/305) - -* Dec 20, 2020 (v1.11.4) - * zstd: Add Best compression mode [#304](https://github.com/klauspost/compress/pull/304) - * Add header decoder [#299](https://github.com/klauspost/compress/pull/299) - * s2: Add uncompressed stream option [#297](https://github.com/klauspost/compress/pull/297) - * Simplify/speed up small blocks with known max size. [#300](https://github.com/klauspost/compress/pull/300) - * zstd: Always reset literal dict encoder [#303](https://github.com/klauspost/compress/pull/303) - -* Nov 15, 2020 (v1.11.3) - * inflate: 10-15% faster decompression [#293](https://github.com/klauspost/compress/pull/293) - * zstd: Tweak DecodeAll default allocation [#295](https://github.com/klauspost/compress/pull/295) - -* Oct 11, 2020 (v1.11.2) - * s2: Fix out of bounds read in "better" block compression [#291](https://github.com/klauspost/compress/pull/291) - -* Oct 1, 2020 (v1.11.1) - * zstd: Set allLitEntropy true in default configuration [#286](https://github.com/klauspost/compress/pull/286) - -* Sept 8, 2020 (v1.11.0) - * zstd: Add experimental compression [dictionaries](https://github.com/klauspost/compress/tree/master/zstd#dictionaries) [#281](https://github.com/klauspost/compress/pull/281) - * zstd: Fix mixed Write and ReadFrom calls [#282](https://github.com/klauspost/compress/pull/282) - * inflate/gz: Limit variable shifts, ~5% faster decompression [#274](https://github.com/klauspost/compress/pull/274) -
- -
- See changes to v1.10.x - -* July 8, 2020 (v1.10.11) - * zstd: Fix extra block when compressing with ReadFrom. [#278](https://github.com/klauspost/compress/pull/278) - * huff0: Also populate compression table when reading decoding table. [#275](https://github.com/klauspost/compress/pull/275) - -* June 23, 2020 (v1.10.10) - * zstd: Skip entropy compression in fastest mode when no matches. [#270](https://github.com/klauspost/compress/pull/270) - -* June 16, 2020 (v1.10.9): - * zstd: API change for specifying dictionaries. See [#268](https://github.com/klauspost/compress/pull/268) - * zip: update CreateHeaderRaw to handle zip64 fields. [#266](https://github.com/klauspost/compress/pull/266) - * Fuzzit tests removed. The service has been purchased and is no longer available. - -* June 5, 2020 (v1.10.8): - * 1.15x faster zstd block decompression. [#265](https://github.com/klauspost/compress/pull/265) - -* June 1, 2020 (v1.10.7): - * Added zstd decompression [dictionary support](https://github.com/klauspost/compress/tree/master/zstd#dictionaries) - * Increase zstd decompression speed up to 1.19x. [#259](https://github.com/klauspost/compress/pull/259) - * Remove internal reset call in zstd compression and reduce allocations. [#263](https://github.com/klauspost/compress/pull/263) - -* May 21, 2020: (v1.10.6) - * zstd: Reduce allocations while decoding. [#258](https://github.com/klauspost/compress/pull/258), [#252](https://github.com/klauspost/compress/pull/252) - * zstd: Stricter decompression checks. - -* April 12, 2020: (v1.10.5) - * s2-commands: Flush output when receiving SIGINT. [#239](https://github.com/klauspost/compress/pull/239) - -* Apr 8, 2020: (v1.10.4) - * zstd: Minor/special case optimizations. [#251](https://github.com/klauspost/compress/pull/251), [#250](https://github.com/klauspost/compress/pull/250), [#249](https://github.com/klauspost/compress/pull/249), [#247](https://github.com/klauspost/compress/pull/247) -* Mar 11, 2020: (v1.10.3) - * s2: Use S2 encoder in pure Go mode for Snappy output as well. [#245](https://github.com/klauspost/compress/pull/245) - * s2: Fix pure Go block encoder. [#244](https://github.com/klauspost/compress/pull/244) - * zstd: Added "better compression" mode. [#240](https://github.com/klauspost/compress/pull/240) - * zstd: Improve speed of fastest compression mode by 5-10% [#241](https://github.com/klauspost/compress/pull/241) - * zstd: Skip creating encoders when not needed. [#238](https://github.com/klauspost/compress/pull/238) - -* Feb 27, 2020: (v1.10.2) - * Close to 50% speedup in inflate (gzip/zip decompression). [#236](https://github.com/klauspost/compress/pull/236) [#234](https://github.com/klauspost/compress/pull/234) [#232](https://github.com/klauspost/compress/pull/232) - * Reduce deflate level 1-6 memory usage up to 59%. [#227](https://github.com/klauspost/compress/pull/227) - -* Feb 18, 2020: (v1.10.1) - * Fix zstd crash when resetting multiple times without sending data. [#226](https://github.com/klauspost/compress/pull/226) - * deflate: Fix dictionary use on level 1-6. [#224](https://github.com/klauspost/compress/pull/224) - * Remove deflate writer reference when closing. [#224](https://github.com/klauspost/compress/pull/224) - -* Feb 4, 2020: (v1.10.0) - * Add optional dictionary to [stateless deflate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc#StatelessDeflate). Breaking change, send `nil` for previous behaviour. [#216](https://github.com/klauspost/compress/pull/216) - * Fix buffer overflow on repeated small block deflate. [#218](https://github.com/klauspost/compress/pull/218) - * Allow copying content from an existing ZIP file without decompressing+compressing. [#214](https://github.com/klauspost/compress/pull/214) - * Added [S2](https://github.com/klauspost/compress/tree/master/s2#s2-compression) AMD64 assembler and various optimizations. Stream speed >10GB/s. [#186](https://github.com/klauspost/compress/pull/186) - -
- -
- See changes prior to v1.10.0 - -* Jan 20,2020 (v1.9.8) Optimize gzip/deflate with better size estimates and faster table generation. [#207](https://github.com/klauspost/compress/pull/207) by [luyu6056](https://github.com/luyu6056), [#206](https://github.com/klauspost/compress/pull/206). -* Jan 11, 2020: S2 Encode/Decode will use provided buffer if capacity is big enough. [#204](https://github.com/klauspost/compress/pull/204) -* Jan 5, 2020: (v1.9.7) Fix another zstd regression in v1.9.5 - v1.9.6 removed. -* Jan 4, 2020: (v1.9.6) Regression in v1.9.5 fixed causing corrupt zstd encodes in rare cases. -* Jan 4, 2020: Faster IO in [s2c + s2d commandline tools](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) compression/decompression. [#192](https://github.com/klauspost/compress/pull/192) -* Dec 29, 2019: Removed v1.9.5 since fuzz tests showed a compatibility problem with the reference zstandard decoder. -* Dec 29, 2019: (v1.9.5) zstd: 10-20% faster block compression. [#199](https://github.com/klauspost/compress/pull/199) -* Dec 29, 2019: [zip](https://godoc.org/github.com/klauspost/compress/zip) package updated with latest Go features -* Dec 29, 2019: zstd: Single segment flag condintions tweaked. [#197](https://github.com/klauspost/compress/pull/197) -* Dec 18, 2019: s2: Faster compression when ReadFrom is used. [#198](https://github.com/klauspost/compress/pull/198) -* Dec 10, 2019: s2: Fix repeat length output when just above at 16MB limit. -* Dec 10, 2019: zstd: Add function to get decoder as io.ReadCloser. [#191](https://github.com/klauspost/compress/pull/191) -* Dec 3, 2019: (v1.9.4) S2: limit max repeat length. [#188](https://github.com/klauspost/compress/pull/188) -* Dec 3, 2019: Add [WithNoEntropyCompression](https://godoc.org/github.com/klauspost/compress/zstd#WithNoEntropyCompression) to zstd [#187](https://github.com/klauspost/compress/pull/187) -* Dec 3, 2019: Reduce memory use for tests. Check for leaked goroutines. -* Nov 28, 2019 (v1.9.3) Less allocations in stateless deflate. -* Nov 28, 2019: 5-20% Faster huff0 decode. Impacts zstd as well. [#184](https://github.com/klauspost/compress/pull/184) -* Nov 12, 2019 (v1.9.2) Added [Stateless Compression](#stateless-compression) for gzip/deflate. -* Nov 12, 2019: Fixed zstd decompression of large single blocks. [#180](https://github.com/klauspost/compress/pull/180) -* Nov 11, 2019: Set default [s2c](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) block size to 4MB. -* Nov 11, 2019: Reduce inflate memory use by 1KB. -* Nov 10, 2019: Less allocations in deflate bit writer. -* Nov 10, 2019: Fix inconsistent error returned by zstd decoder. -* Oct 28, 2019 (v1.9.1) ztsd: Fix crash when compressing blocks. [#174](https://github.com/klauspost/compress/pull/174) -* Oct 24, 2019 (v1.9.0) zstd: Fix rare data corruption [#173](https://github.com/klauspost/compress/pull/173) -* Oct 24, 2019 zstd: Fix huff0 out of buffer write [#171](https://github.com/klauspost/compress/pull/171) and always return errors [#172](https://github.com/klauspost/compress/pull/172) -* Oct 10, 2019: Big deflate rewrite, 30-40% faster with better compression [#105](https://github.com/klauspost/compress/pull/105) - -
- -
- See changes prior to v1.9.0 - -* Oct 10, 2019: (v1.8.6) zstd: Allow partial reads to get flushed data. [#169](https://github.com/klauspost/compress/pull/169) -* Oct 3, 2019: Fix inconsistent results on broken zstd streams. -* Sep 25, 2019: Added `-rm` (remove source files) and `-q` (no output except errors) to `s2c` and `s2d` [commands](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) -* Sep 16, 2019: (v1.8.4) Add `s2c` and `s2d` [commandline tools](https://github.com/klauspost/compress/tree/master/s2#commandline-tools). -* Sep 10, 2019: (v1.8.3) Fix s2 decoder [Skip](https://godoc.org/github.com/klauspost/compress/s2#Reader.Skip). -* Sep 7, 2019: zstd: Added [WithWindowSize](https://godoc.org/github.com/klauspost/compress/zstd#WithWindowSize), contributed by [ianwilkes](https://github.com/ianwilkes). -* Sep 5, 2019: (v1.8.2) Add [WithZeroFrames](https://godoc.org/github.com/klauspost/compress/zstd#WithZeroFrames) which adds full zero payload block encoding option. -* Sep 5, 2019: Lazy initialization of zstandard predefined en/decoder tables. -* Aug 26, 2019: (v1.8.1) S2: 1-2% compression increase in "better" compression mode. -* Aug 26, 2019: zstd: Check maximum size of Huffman 1X compressed literals while decoding. -* Aug 24, 2019: (v1.8.0) Added [S2 compression](https://github.com/klauspost/compress/tree/master/s2#s2-compression), a high performance replacement for Snappy. -* Aug 21, 2019: (v1.7.6) Fixed minor issues found by fuzzer. One could lead to zstd not decompressing. -* Aug 18, 2019: Add [fuzzit](https://fuzzit.dev/) continuous fuzzing. -* Aug 14, 2019: zstd: Skip incompressible data 2x faster. [#147](https://github.com/klauspost/compress/pull/147) -* Aug 4, 2019 (v1.7.5): Better literal compression. [#146](https://github.com/klauspost/compress/pull/146) -* Aug 4, 2019: Faster zstd compression. [#143](https://github.com/klauspost/compress/pull/143) [#144](https://github.com/klauspost/compress/pull/144) -* Aug 4, 2019: Faster zstd decompression. [#145](https://github.com/klauspost/compress/pull/145) [#143](https://github.com/klauspost/compress/pull/143) [#142](https://github.com/klauspost/compress/pull/142) -* July 15, 2019 (v1.7.4): Fix double EOF block in rare cases on zstd encoder. -* July 15, 2019 (v1.7.3): Minor speedup/compression increase in default zstd encoder. -* July 14, 2019: zstd decoder: Fix decompression error on multiple uses with mixed content. -* July 7, 2019 (v1.7.2): Snappy update, zstd decoder potential race fix. -* June 17, 2019: zstd decompression bugfix. -* June 17, 2019: fix 32 bit builds. -* June 17, 2019: Easier use in modules (less dependencies). -* June 9, 2019: New stronger "default" [zstd](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression mode. Matches zstd default compression ratio. -* June 5, 2019: 20-40% throughput in [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and better compression. -* June 5, 2019: deflate/gzip compression: Reduce memory usage of lower compression levels. -* June 2, 2019: Added [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression! -* May 25, 2019: deflate/gzip: 10% faster bit writer, mostly visible in lower levels. -* Apr 22, 2019: [zstd](https://github.com/klauspost/compress/tree/master/zstd#zstd) decompression added. -* Aug 1, 2018: Added [huff0 README](https://github.com/klauspost/compress/tree/master/huff0#huff0-entropy-compression). -* Jul 8, 2018: Added [Performance Update 2018](#performance-update-2018) below. -* Jun 23, 2018: Merged [Go 1.11 inflate optimizations](https://go-review.googlesource.com/c/go/+/102235). Go 1.9 is now required. Backwards compatible version tagged with [v1.3.0](https://github.com/klauspost/compress/releases/tag/v1.3.0). -* Apr 2, 2018: Added [huff0](https://godoc.org/github.com/klauspost/compress/huff0) en/decoder. Experimental for now, API may change. -* Mar 4, 2018: Added [FSE Entropy](https://godoc.org/github.com/klauspost/compress/fse) en/decoder. Experimental for now, API may change. -* Nov 3, 2017: Add compression [Estimate](https://godoc.org/github.com/klauspost/compress#Estimate) function. -* May 28, 2017: Reduce allocations when resetting decoder. -* Apr 02, 2017: Change back to official crc32, since changes were merged in Go 1.7. -* Jan 14, 2017: Reduce stack pressure due to array copies. See [Issue #18625](https://github.com/golang/go/issues/18625). -* Oct 25, 2016: Level 2-4 have been rewritten and now offers significantly better performance than before. -* Oct 20, 2016: Port zlib changes from Go 1.7 to fix zlib writer issue. Please update. -* Oct 16, 2016: Go 1.7 changes merged. Apples to apples this package is a few percent faster, but has a significantly better balance between speed and compression per level. -* Mar 24, 2016: Always attempt Huffman encoding on level 4-7. This improves base 64 encoded data compression. -* Mar 24, 2016: Small speedup for level 1-3. -* Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster. -* Feb 19, 2016: Handle small payloads faster in level 1-3. -* Feb 19, 2016: Added faster level 2 + 3 compression modes. -* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progression in terms of compression. New default level is 5. -* Feb 14, 2016: Snappy: Merge upstream changes. -* Feb 14, 2016: Snappy: Fix aggressive skipping. -* Feb 14, 2016: Snappy: Update benchmark. -* Feb 13, 2016: Deflate: Fixed assembler problem that could lead to sub-optimal compression. -* Feb 12, 2016: Snappy: Added AMD64 SSE 4.2 optimizations to matching, which makes easy to compress material run faster. Typical speedup is around 25%. -* Feb 9, 2016: Added Snappy package fork. This version is 5-7% faster, much more on hard to compress content. -* Jan 30, 2016: Optimize level 1 to 3 by not considering static dictionary or storing uncompressed. ~4-5% speedup. -* Jan 16, 2016: Optimization on deflate level 1,2,3 compression. -* Jan 8 2016: Merge [CL 18317](https://go-review.googlesource.com/#/c/18317): fix reading, writing of zip64 archives. -* Dec 8 2015: Make level 1 and -2 deterministic even if write size differs. -* Dec 8 2015: Split encoding functions, so hashing and matching can potentially be inlined. 1-3% faster on AMD64. 5% faster on other platforms. -* Dec 8 2015: Fixed rare [one byte out-of bounds read](https://github.com/klauspost/compress/issues/20). Please update! -* Nov 23 2015: Optimization on token writer. ~2-4% faster. Contributed by [@dsnet](https://github.com/dsnet). -* Nov 20 2015: Small optimization to bit writer on 64 bit systems. -* Nov 17 2015: Fixed out-of-bound errors if the underlying Writer returned an error. See [#15](https://github.com/klauspost/compress/issues/15). -* Nov 12 2015: Added [io.WriterTo](https://golang.org/pkg/io/#WriterTo) support to gzip/inflate. -* Nov 11 2015: Merged [CL 16669](https://go-review.googlesource.com/#/c/16669/4): archive/zip: enable overriding (de)compressors per file -* Oct 15 2015: Added skipping on uncompressible data. Random data speed up >5x. - -
- -# deflate usage - -The packages are drop-in replacements for standard libraries. Simply replace the import path to use them: - -Typical speed is about 2x of the standard library packages. - -| old import | new import | Documentation | -|------------------|---------------------------------------|-------------------------------------------------------------------------| -| `compress/gzip` | `github.com/klauspost/compress/gzip` | [gzip](https://pkg.go.dev/github.com/klauspost/compress/gzip?tab=doc) | -| `compress/zlib` | `github.com/klauspost/compress/zlib` | [zlib](https://pkg.go.dev/github.com/klauspost/compress/zlib?tab=doc) | -| `archive/zip` | `github.com/klauspost/compress/zip` | [zip](https://pkg.go.dev/github.com/klauspost/compress/zip?tab=doc) | -| `compress/flate` | `github.com/klauspost/compress/flate` | [flate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc) | - -* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib). - -You may also be interested in [pgzip](https://github.com/klauspost/pgzip), which is a drop in replacement for gzip, which support multithreaded compression on big files and the optimized [crc32](https://github.com/klauspost/crc32) package used by these packages. - -The packages contains the same as the standard library, so you can use the godoc for that: [gzip](http://golang.org/pkg/compress/gzip/), [zip](http://golang.org/pkg/archive/zip/), [zlib](http://golang.org/pkg/compress/zlib/), [flate](http://golang.org/pkg/compress/flate/). - -Currently there is only minor speedup on decompression (mostly CRC32 calculation). - -Memory usage is typically 1MB for a Writer. stdlib is in the same range. -If you expect to have a lot of concurrently allocated Writers consider using -the stateless compress described below. - -For compression performance, see: [this spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing). - -To disable all assembly add `-tags=noasm`. This works across all packages. - -# Stateless compression - -This package offers stateless compression as a special option for gzip/deflate. -It will do compression but without maintaining any state between Write calls. - -This means there will be no memory kept between Write calls, but compression and speed will be suboptimal. - -This is only relevant in cases where you expect to run many thousands of compressors concurrently, -but with very little activity. This is *not* intended for regular web servers serving individual requests. - -Because of this, the size of actual Write calls will affect output size. - -In gzip, specify level `-3` / `gzip.StatelessCompression` to enable. - -For direct deflate use, NewStatelessWriter and StatelessDeflate are available. See [documentation](https://godoc.org/github.com/klauspost/compress/flate#NewStatelessWriter) - -A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer: - -```go - // replace 'ioutil.Discard' with your output. - gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression) - if err != nil { - return err - } - defer gzw.Close() - - w := bufio.NewWriterSize(gzw, 4096) - defer w.Flush() - - // Write to 'w' -``` - -This will only use up to 4KB in memory when the writer is idle. - -Compression is almost always worse than the fastest compression level -and each write will allocate (a little) memory. - - -# Other packages - -Here are other packages of good quality and pure Go (no cgo wrappers or autoconverted code): - -* [github.com/pierrec/lz4](https://github.com/pierrec/lz4) - strong multithreaded LZ4 compression. -* [github.com/cosnicolaou/pbzip2](https://github.com/cosnicolaou/pbzip2) - multithreaded bzip2 decompression. -* [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer. -* [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression. -* [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression. -* [github.com/minio/zipindex](https://github.com/minio/zipindex) - External ZIP directory index. -* [github.com/ybirader/pzip](https://github.com/ybirader/pzip) - Fast concurrent zip archiver and extractor. - -# license - -This code is licensed under the same conditions as the original Go code. See LICENSE file. diff --git a/vendor/github.com/klauspost/compress/SECURITY.md b/vendor/github.com/klauspost/compress/SECURITY.md deleted file mode 100644 index ca6685e2b..000000000 --- a/vendor/github.com/klauspost/compress/SECURITY.md +++ /dev/null @@ -1,25 +0,0 @@ -# Security Policy - -## Supported Versions - -Security updates are applied only to the latest release. - -## Vulnerability Definition - -A security vulnerability is a bug that with certain input triggers a crash or an infinite loop. Most calls will have varying execution time and only in rare cases will slow operation be considered a security vulnerability. - -Corrupted output generally is not considered a security vulnerability, unless independent operations are able to affect each other. Note that not all functionality is re-entrant and safe to use concurrently. - -Out-of-memory crashes only applies if the en/decoder uses an abnormal amount of memory, with appropriate options applied, to limit maximum window size, concurrency, etc. However, if you are in doubt you are welcome to file a security issue. - -It is assumed that all callers are trusted, meaning internal data exposed through reflection or inspection of returned data structures is not considered a vulnerability. - -Vulnerabilities resulting from compiler/assembler errors should be reported upstream. Depending on the severity this package may or may not implement a workaround. - -## Reporting a Vulnerability - -If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released. - -Please disclose it at [security advisory](https://github.com/klauspost/compress/security/advisories/new). If possible please provide a minimal reproducer. If the issue only applies to a single platform, it would be helpful to provide access to that. - -This project is maintained by a team of volunteers on a reasonable-effort basis. As such, vulnerabilities will be disclosed in a best effort base. diff --git a/vendor/github.com/klauspost/compress/compressible.go b/vendor/github.com/klauspost/compress/compressible.go deleted file mode 100644 index ea5a692d5..000000000 --- a/vendor/github.com/klauspost/compress/compressible.go +++ /dev/null @@ -1,85 +0,0 @@ -package compress - -import "math" - -// Estimate returns a normalized compressibility estimate of block b. -// Values close to zero are likely uncompressible. -// Values above 0.1 are likely to be compressible. -// Values above 0.5 are very compressible. -// Very small lengths will return 0. -func Estimate(b []byte) float64 { - if len(b) < 16 { - return 0 - } - - // Correctly predicted order 1 - hits := 0 - lastMatch := false - var o1 [256]byte - var hist [256]int - c1 := byte(0) - for _, c := range b { - if c == o1[c1] { - // We only count a hit if there was two correct predictions in a row. - if lastMatch { - hits++ - } - lastMatch = true - } else { - lastMatch = false - } - o1[c1] = c - c1 = c - hist[c]++ - } - - // Use x^0.6 to give better spread - prediction := math.Pow(float64(hits)/float64(len(b)), 0.6) - - // Calculate histogram distribution - variance := float64(0) - avg := float64(len(b)) / 256 - - for _, v := range hist { - Δ := float64(v) - avg - variance += Δ * Δ - } - - stddev := math.Sqrt(float64(variance)) / float64(len(b)) - exp := math.Sqrt(1 / float64(len(b))) - - // Subtract expected stddev - stddev -= exp - if stddev < 0 { - stddev = 0 - } - stddev *= 1 + exp - - // Use x^0.4 to give better spread - entropy := math.Pow(stddev, 0.4) - - // 50/50 weight between prediction and histogram distribution - return math.Pow((prediction+entropy)/2, 0.9) -} - -// ShannonEntropyBits returns the number of bits minimum required to represent -// an entropy encoding of the input bytes. -// https://en.wiktionary.org/wiki/Shannon_entropy -func ShannonEntropyBits(b []byte) int { - if len(b) == 0 { - return 0 - } - var hist [256]int - for _, c := range b { - hist[c]++ - } - shannon := float64(0) - invTotal := 1.0 / float64(len(b)) - for _, v := range hist[:] { - if v > 0 { - n := float64(v) - shannon += math.Ceil(-math.Log2(n*invTotal) * n) - } - } - return int(math.Ceil(shannon)) -} diff --git a/vendor/github.com/klauspost/compress/fse/README.md b/vendor/github.com/klauspost/compress/fse/README.md deleted file mode 100644 index ea7324da6..000000000 --- a/vendor/github.com/klauspost/compress/fse/README.md +++ /dev/null @@ -1,79 +0,0 @@ -# Finite State Entropy - -This package provides Finite State Entropy encoding and decoding. - -Finite State Entropy (also referenced as [tANS](https://en.wikipedia.org/wiki/Asymmetric_numeral_systems#tANS)) -encoding provides a fast near-optimal symbol encoding/decoding -for byte blocks as implemented in [zstandard](https://github.com/facebook/zstd). - -This can be used for compressing input with a lot of similar input values to the smallest number of bytes. -This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders, -but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding. - -* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/fse) - -## News - - * Feb 2018: First implementation released. Consider this beta software for now. - -# Usage - -This package provides a low level interface that allows to compress single independent blocks. - -Each block is separate, and there is no built in integrity checks. -This means that the caller should keep track of block sizes and also do checksums if needed. - -Compressing a block is done via the [`Compress`](https://godoc.org/github.com/klauspost/compress/fse#Compress) function. -You must provide input and will receive the output and maybe an error. - -These error values can be returned: - -| Error | Description | -|---------------------|-----------------------------------------------------------------------------| -| `` | Everything ok, output is returned | -| `ErrIncompressible` | Returned when input is judged to be too hard to compress | -| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated | -| `(error)` | An internal error occurred. | - -As can be seen above there are errors that will be returned even under normal operation so it is important to handle these. - -To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/fse#Scratch) object -that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same -object can be used for both. - -Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this -you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output. - -Decompressing is done by calling the [`Decompress`](https://godoc.org/github.com/klauspost/compress/fse#Decompress) function. -You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back -your input was likely corrupted. - -It is important to note that a successful decoding does *not* mean your output matches your original input. -There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid. - -For more detailed usage, see examples in the [godoc documentation](https://godoc.org/github.com/klauspost/compress/fse#pkg-examples). - -# Performance - -A lot of factors are affecting speed. Block sizes and compressibility of the material are primary factors. -All compression functions are currently only running on the calling goroutine so only one core will be used per block. - -The compressor is significantly faster if symbols are kept as small as possible. The highest byte value of the input -is used to reduce some of the processing, so if all your input is above byte value 64 for instance, it may be -beneficial to transpose all your input values down by 64. - -With moderate block sizes around 64k speed are typically 200MB/s per core for compression and -around 300MB/s decompression speed. - -The same hardware typically does Huffman (deflate) encoding at 125MB/s and decompression at 100MB/s. - -# Plans - -At one point, more internals will be exposed to facilitate more "expert" usage of the components. - -A streaming interface is also likely to be implemented. Likely compatible with [FSE stream format](https://github.com/Cyan4973/FiniteStateEntropy/blob/dev/programs/fileio.c#L261). - -# Contributing - -Contributions are always welcome. Be aware that adding public functions will require good justification and breaking -changes will likely not be accepted. If in doubt open an issue before writing the PR. \ No newline at end of file diff --git a/vendor/github.com/klauspost/compress/fse/bitreader.go b/vendor/github.com/klauspost/compress/fse/bitreader.go deleted file mode 100644 index f65eb3909..000000000 --- a/vendor/github.com/klauspost/compress/fse/bitreader.go +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright 2018 Klaus Post. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. - -package fse - -import ( - "encoding/binary" - "errors" - "io" -) - -// bitReader reads a bitstream in reverse. -// The last set bit indicates the start of the stream and is used -// for aligning the input. -type bitReader struct { - in []byte - off uint // next byte to read is at in[off - 1] - value uint64 - bitsRead uint8 -} - -// init initializes and resets the bit reader. -func (b *bitReader) init(in []byte) error { - if len(in) < 1 { - return errors.New("corrupt stream: too short") - } - b.in = in - b.off = uint(len(in)) - // The highest bit of the last byte indicates where to start - v := in[len(in)-1] - if v == 0 { - return errors.New("corrupt stream, did not find end of stream") - } - b.bitsRead = 64 - b.value = 0 - if len(in) >= 8 { - b.fillFastStart() - } else { - b.fill() - b.fill() - } - b.bitsRead += 8 - uint8(highBits(uint32(v))) - return nil -} - -// getBits will return n bits. n can be 0. -func (b *bitReader) getBits(n uint8) uint16 { - if n == 0 || b.bitsRead >= 64 { - return 0 - } - return b.getBitsFast(n) -} - -// getBitsFast requires that at least one bit is requested every time. -// There are no checks if the buffer is filled. -func (b *bitReader) getBitsFast(n uint8) uint16 { - const regMask = 64 - 1 - v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) - b.bitsRead += n - return v -} - -// fillFast() will make sure at least 32 bits are available. -// There must be at least 4 bytes available. -func (b *bitReader) fillFast() { - if b.bitsRead < 32 { - return - } - // 2 bounds checks. - v := b.in[b.off-4:] - v = v[:4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - b.value = (b.value << 32) | uint64(low) - b.bitsRead -= 32 - b.off -= 4 -} - -// fill() will make sure at least 32 bits are available. -func (b *bitReader) fill() { - if b.bitsRead < 32 { - return - } - if b.off > 4 { - v := b.in[b.off-4:] - v = v[:4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - b.value = (b.value << 32) | uint64(low) - b.bitsRead -= 32 - b.off -= 4 - return - } - for b.off > 0 { - b.value = (b.value << 8) | uint64(b.in[b.off-1]) - b.bitsRead -= 8 - b.off-- - } -} - -// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. -func (b *bitReader) fillFastStart() { - // Do single re-slice to avoid bounds checks. - b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) - b.bitsRead = 0 - b.off -= 8 -} - -// finished returns true if all bits have been read from the bit stream. -func (b *bitReader) finished() bool { - return b.bitsRead >= 64 && b.off == 0 -} - -// close the bitstream and returns an error if out-of-buffer reads occurred. -func (b *bitReader) close() error { - // Release reference. - b.in = nil - if b.bitsRead > 64 { - return io.ErrUnexpectedEOF - } - return nil -} diff --git a/vendor/github.com/klauspost/compress/fse/bitwriter.go b/vendor/github.com/klauspost/compress/fse/bitwriter.go deleted file mode 100644 index e82fa3bb7..000000000 --- a/vendor/github.com/klauspost/compress/fse/bitwriter.go +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright 2018 Klaus Post. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. - -package fse - -import "fmt" - -// bitWriter will write bits. -// First bit will be LSB of the first byte of output. -type bitWriter struct { - bitContainer uint64 - nBits uint8 - out []byte -} - -// bitMask16 is bitmasks. Has extra to avoid bounds check. -var bitMask16 = [32]uint16{ - 0, 1, 3, 7, 0xF, 0x1F, - 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, - 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF, - 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, - 0xFFFF, 0xFFFF} /* up to 16 bits */ - -// addBits16NC will add up to 16 bits. -// It will not check if there is space for them, -// so the caller must ensure that it has flushed recently. -func (b *bitWriter) addBits16NC(value uint16, bits uint8) { - b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63) - b.nBits += bits -} - -// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. -// It will not check if there is space for them, so the caller must ensure that it has flushed recently. -func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { - b.bitContainer |= uint64(value) << (b.nBits & 63) - b.nBits += bits -} - -// addBits16ZeroNC will add up to 16 bits. -// It will not check if there is space for them, -// so the caller must ensure that it has flushed recently. -// This is fastest if bits can be zero. -func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) { - if bits == 0 { - return - } - value <<= (16 - bits) & 15 - value >>= (16 - bits) & 15 - b.bitContainer |= uint64(value) << (b.nBits & 63) - b.nBits += bits -} - -// flush will flush all pending full bytes. -// There will be at least 56 bits available for writing when this has been called. -// Using flush32 is faster, but leaves less space for writing. -func (b *bitWriter) flush() { - v := b.nBits >> 3 - switch v { - case 0: - case 1: - b.out = append(b.out, - byte(b.bitContainer), - ) - case 2: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - ) - case 3: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - ) - case 4: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - ) - case 5: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - ) - case 6: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - byte(b.bitContainer>>40), - ) - case 7: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - byte(b.bitContainer>>40), - byte(b.bitContainer>>48), - ) - case 8: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - byte(b.bitContainer>>40), - byte(b.bitContainer>>48), - byte(b.bitContainer>>56), - ) - default: - panic(fmt.Errorf("bits (%d) > 64", b.nBits)) - } - b.bitContainer >>= v << 3 - b.nBits &= 7 -} - -// flush32 will flush out, so there are at least 32 bits available for writing. -func (b *bitWriter) flush32() { - if b.nBits < 32 { - return - } - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24)) - b.nBits -= 32 - b.bitContainer >>= 32 -} - -// flushAlign will flush remaining full bytes and align to next byte boundary. -func (b *bitWriter) flushAlign() { - nbBytes := (b.nBits + 7) >> 3 - for i := uint8(0); i < nbBytes; i++ { - b.out = append(b.out, byte(b.bitContainer>>(i*8))) - } - b.nBits = 0 - b.bitContainer = 0 -} - -// close will write the alignment bit and write the final byte(s) -// to the output. -func (b *bitWriter) close() { - // End mark - b.addBits16Clean(1, 1) - // flush until next byte. - b.flushAlign() -} - -// reset and continue writing by appending to out. -func (b *bitWriter) reset(out []byte) { - b.bitContainer = 0 - b.nBits = 0 - b.out = out -} diff --git a/vendor/github.com/klauspost/compress/fse/bytereader.go b/vendor/github.com/klauspost/compress/fse/bytereader.go deleted file mode 100644 index abade2d60..000000000 --- a/vendor/github.com/klauspost/compress/fse/bytereader.go +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2018 Klaus Post. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. - -package fse - -// byteReader provides a byte reader that reads -// little endian values from a byte stream. -// The input stream is manually advanced. -// The reader performs no bounds checks. -type byteReader struct { - b []byte - off int -} - -// init will initialize the reader and set the input. -func (b *byteReader) init(in []byte) { - b.b = in - b.off = 0 -} - -// advance the stream b n bytes. -func (b *byteReader) advance(n uint) { - b.off += int(n) -} - -// Uint32 returns a little endian uint32 starting at current offset. -func (b byteReader) Uint32() uint32 { - b2 := b.b[b.off:] - b2 = b2[:4] - v3 := uint32(b2[3]) - v2 := uint32(b2[2]) - v1 := uint32(b2[1]) - v0 := uint32(b2[0]) - return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) -} - -// unread returns the unread portion of the input. -func (b byteReader) unread() []byte { - return b.b[b.off:] -} - -// remain will return the number of bytes remaining. -func (b byteReader) remain() int { - return len(b.b) - b.off -} diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go deleted file mode 100644 index 074018d8f..000000000 --- a/vendor/github.com/klauspost/compress/fse/compress.go +++ /dev/null @@ -1,683 +0,0 @@ -// Copyright 2018 Klaus Post. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. - -package fse - -import ( - "errors" - "fmt" -) - -// Compress the input bytes. Input must be < 2GB. -// Provide a Scratch buffer to avoid memory allocations. -// Note that the output is also kept in the scratch buffer. -// If input is too hard to compress, ErrIncompressible is returned. -// If input is a single byte value repeated ErrUseRLE is returned. -func Compress(in []byte, s *Scratch) ([]byte, error) { - if len(in) <= 1 { - return nil, ErrIncompressible - } - if len(in) > (2<<30)-1 { - return nil, errors.New("input too big, must be < 2GB") - } - s, err := s.prepare(in) - if err != nil { - return nil, err - } - - // Create histogram, if none was provided. - maxCount := s.maxCount - if maxCount == 0 { - maxCount = s.countSimple(in) - } - // Reset for next run. - s.clearCount = true - s.maxCount = 0 - if maxCount == len(in) { - // One symbol, use RLE - return nil, ErrUseRLE - } - if maxCount == 1 || maxCount < (len(in)>>7) { - // Each symbol present maximum once or too well distributed. - return nil, ErrIncompressible - } - s.optimalTableLog() - err = s.normalizeCount() - if err != nil { - return nil, err - } - err = s.writeCount() - if err != nil { - return nil, err - } - - if false { - err = s.validateNorm() - if err != nil { - return nil, err - } - } - - err = s.buildCTable() - if err != nil { - return nil, err - } - err = s.compress(in) - if err != nil { - return nil, err - } - s.Out = s.bw.out - // Check if we compressed. - if len(s.Out) >= len(in) { - return nil, ErrIncompressible - } - return s.Out, nil -} - -// cState contains the compression state of a stream. -type cState struct { - bw *bitWriter - stateTable []uint16 - state uint16 -} - -// init will initialize the compression state to the first symbol of the stream. -func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTransform) { - c.bw = bw - c.stateTable = ct.stateTable - - nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16 - im := int32((nbBitsOut << 16) - first.deltaNbBits) - lu := (im >> nbBitsOut) + first.deltaFindState - c.state = c.stateTable[lu] -} - -// encode the output symbol provided and write it to the bitstream. -func (c *cState) encode(symbolTT symbolTransform) { - nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 - dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState - c.bw.addBits16NC(c.state, uint8(nbBitsOut)) - c.state = c.stateTable[dstState] -} - -// encode the output symbol provided and write it to the bitstream. -func (c *cState) encodeZero(symbolTT symbolTransform) { - nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 - dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState - c.bw.addBits16ZeroNC(c.state, uint8(nbBitsOut)) - c.state = c.stateTable[dstState] -} - -// flush will write the tablelog to the output and flush the remaining full bytes. -func (c *cState) flush(tableLog uint8) { - c.bw.flush32() - c.bw.addBits16NC(c.state, tableLog) - c.bw.flush() -} - -// compress is the main compression loop that will encode the input from the last byte to the first. -func (s *Scratch) compress(src []byte) error { - if len(src) <= 2 { - return errors.New("compress: src too small") - } - tt := s.ct.symbolTT[:256] - s.bw.reset(s.Out) - - // Our two states each encodes every second byte. - // Last byte encoded (first byte decoded) will always be encoded by c1. - var c1, c2 cState - - // Encode so remaining size is divisible by 4. - ip := len(src) - if ip&1 == 1 { - c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]]) - c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]]) - c1.encodeZero(tt[src[ip-3]]) - ip -= 3 - } else { - c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]]) - c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]]) - ip -= 2 - } - if ip&2 != 0 { - c2.encodeZero(tt[src[ip-1]]) - c1.encodeZero(tt[src[ip-2]]) - ip -= 2 - } - src = src[:ip] - - // Main compression loop. - switch { - case !s.zeroBits && s.actualTableLog <= 8: - // We can encode 4 symbols without requiring a flush. - // We do not need to check if any output is 0 bits. - for ; len(src) >= 4; src = src[:len(src)-4] { - s.bw.flush32() - v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] - c2.encode(tt[v0]) - c1.encode(tt[v1]) - c2.encode(tt[v2]) - c1.encode(tt[v3]) - } - case !s.zeroBits: - // We do not need to check if any output is 0 bits. - for ; len(src) >= 4; src = src[:len(src)-4] { - s.bw.flush32() - v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] - c2.encode(tt[v0]) - c1.encode(tt[v1]) - s.bw.flush32() - c2.encode(tt[v2]) - c1.encode(tt[v3]) - } - case s.actualTableLog <= 8: - // We can encode 4 symbols without requiring a flush - for ; len(src) >= 4; src = src[:len(src)-4] { - s.bw.flush32() - v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] - c2.encodeZero(tt[v0]) - c1.encodeZero(tt[v1]) - c2.encodeZero(tt[v2]) - c1.encodeZero(tt[v3]) - } - default: - for ; len(src) >= 4; src = src[:len(src)-4] { - s.bw.flush32() - v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] - c2.encodeZero(tt[v0]) - c1.encodeZero(tt[v1]) - s.bw.flush32() - c2.encodeZero(tt[v2]) - c1.encodeZero(tt[v3]) - } - } - - // Flush final state. - // Used to initialize state when decoding. - c2.flush(s.actualTableLog) - c1.flush(s.actualTableLog) - - s.bw.close() - return nil -} - -// writeCount will write the normalized histogram count to header. -// This is read back by readNCount. -func (s *Scratch) writeCount() error { - var ( - tableLog = s.actualTableLog - tableSize = 1 << tableLog - previous0 bool - charnum uint16 - - maxHeaderSize = ((int(s.symbolLen)*int(tableLog) + 4 + 2) >> 3) + 3 - - // Write Table Size - bitStream = uint32(tableLog - minTablelog) - bitCount = uint(4) - remaining = int16(tableSize + 1) /* +1 for extra accuracy */ - threshold = int16(tableSize) - nbBits = uint(tableLog + 1) - ) - if cap(s.Out) < maxHeaderSize { - s.Out = make([]byte, 0, s.br.remain()+maxHeaderSize) - } - outP := uint(0) - out := s.Out[:maxHeaderSize] - - // stops at 1 - for remaining > 1 { - if previous0 { - start := charnum - for s.norm[charnum] == 0 { - charnum++ - } - for charnum >= start+24 { - start += 24 - bitStream += uint32(0xFFFF) << bitCount - out[outP] = byte(bitStream) - out[outP+1] = byte(bitStream >> 8) - outP += 2 - bitStream >>= 16 - } - for charnum >= start+3 { - start += 3 - bitStream += 3 << bitCount - bitCount += 2 - } - bitStream += uint32(charnum-start) << bitCount - bitCount += 2 - if bitCount > 16 { - out[outP] = byte(bitStream) - out[outP+1] = byte(bitStream >> 8) - outP += 2 - bitStream >>= 16 - bitCount -= 16 - } - } - - count := s.norm[charnum] - charnum++ - max := (2*threshold - 1) - remaining - if count < 0 { - remaining += count - } else { - remaining -= count - } - count++ // +1 for extra accuracy - if count >= threshold { - count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ - } - bitStream += uint32(count) << bitCount - bitCount += nbBits - if count < max { - bitCount-- - } - - previous0 = count == 1 - if remaining < 1 { - return errors.New("internal error: remaining<1") - } - for remaining < threshold { - nbBits-- - threshold >>= 1 - } - - if bitCount > 16 { - out[outP] = byte(bitStream) - out[outP+1] = byte(bitStream >> 8) - outP += 2 - bitStream >>= 16 - bitCount -= 16 - } - } - - out[outP] = byte(bitStream) - out[outP+1] = byte(bitStream >> 8) - outP += (bitCount + 7) / 8 - - if charnum > s.symbolLen { - return errors.New("internal error: charnum > s.symbolLen") - } - s.Out = out[:outP] - return nil -} - -// symbolTransform contains the state transform for a symbol. -type symbolTransform struct { - deltaFindState int32 - deltaNbBits uint32 -} - -// String prints values as a human readable string. -func (s symbolTransform) String() string { - return fmt.Sprintf("dnbits: %08x, fs:%d", s.deltaNbBits, s.deltaFindState) -} - -// cTable contains tables used for compression. -type cTable struct { - tableSymbol []byte - stateTable []uint16 - symbolTT []symbolTransform -} - -// allocCtable will allocate tables needed for compression. -// If existing tables a re big enough, they are simply re-used. -func (s *Scratch) allocCtable() { - tableSize := 1 << s.actualTableLog - // get tableSymbol that is big enough. - if cap(s.ct.tableSymbol) < tableSize { - s.ct.tableSymbol = make([]byte, tableSize) - } - s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] - - ctSize := tableSize - if cap(s.ct.stateTable) < ctSize { - s.ct.stateTable = make([]uint16, ctSize) - } - s.ct.stateTable = s.ct.stateTable[:ctSize] - - if cap(s.ct.symbolTT) < 256 { - s.ct.symbolTT = make([]symbolTransform, 256) - } - s.ct.symbolTT = s.ct.symbolTT[:256] -} - -// buildCTable will populate the compression table so it is ready to be used. -func (s *Scratch) buildCTable() error { - tableSize := uint32(1 << s.actualTableLog) - highThreshold := tableSize - 1 - var cumul [maxSymbolValue + 2]int16 - - s.allocCtable() - tableSymbol := s.ct.tableSymbol[:tableSize] - // symbol start positions - { - cumul[0] = 0 - for ui, v := range s.norm[:s.symbolLen-1] { - u := byte(ui) // one less than reference - if v == -1 { - // Low proba symbol - cumul[u+1] = cumul[u] + 1 - tableSymbol[highThreshold] = u - highThreshold-- - } else { - cumul[u+1] = cumul[u] + v - } - } - // Encode last symbol separately to avoid overflowing u - u := int(s.symbolLen - 1) - v := s.norm[s.symbolLen-1] - if v == -1 { - // Low proba symbol - cumul[u+1] = cumul[u] + 1 - tableSymbol[highThreshold] = byte(u) - highThreshold-- - } else { - cumul[u+1] = cumul[u] + v - } - if uint32(cumul[s.symbolLen]) != tableSize { - return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize) - } - cumul[s.symbolLen] = int16(tableSize) + 1 - } - // Spread symbols - s.zeroBits = false - { - step := tableStep(tableSize) - tableMask := tableSize - 1 - var position uint32 - // if any symbol > largeLimit, we may have 0 bits output. - largeLimit := int16(1 << (s.actualTableLog - 1)) - for ui, v := range s.norm[:s.symbolLen] { - symbol := byte(ui) - if v > largeLimit { - s.zeroBits = true - } - for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ { - tableSymbol[position] = symbol - position = (position + step) & tableMask - for position > highThreshold { - position = (position + step) & tableMask - } /* Low proba area */ - } - } - - // Check if we have gone through all positions - if position != 0 { - return errors.New("position!=0") - } - } - - // Build table - table := s.ct.stateTable - { - tsi := int(tableSize) - for u, v := range tableSymbol { - // TableU16 : sorted by symbol order; gives next state value - table[cumul[v]] = uint16(tsi + u) - cumul[v]++ - } - } - - // Build Symbol Transformation Table - { - total := int16(0) - symbolTT := s.ct.symbolTT[:s.symbolLen] - tableLog := s.actualTableLog - tl := (uint32(tableLog) << 16) - (1 << tableLog) - for i, v := range s.norm[:s.symbolLen] { - switch v { - case 0: - case -1, 1: - symbolTT[i].deltaNbBits = tl - symbolTT[i].deltaFindState = int32(total - 1) - total++ - default: - maxBitsOut := uint32(tableLog) - highBits(uint32(v-1)) - minStatePlus := uint32(v) << maxBitsOut - symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus - symbolTT[i].deltaFindState = int32(total - v) - total += v - } - } - if total != int16(tableSize) { - return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize) - } - } - return nil -} - -// countSimple will create a simple histogram in s.count. -// Returns the biggest count. -// Does not update s.clearCount. -func (s *Scratch) countSimple(in []byte) (max int) { - for _, v := range in { - s.count[v]++ - } - m, symlen := uint32(0), s.symbolLen - for i, v := range s.count[:] { - if v == 0 { - continue - } - if v > m { - m = v - } - symlen = uint16(i) + 1 - } - s.symbolLen = symlen - return int(m) -} - -// minTableLog provides the minimum logSize to safely represent a distribution. -func (s *Scratch) minTableLog() uint8 { - minBitsSrc := highBits(uint32(s.br.remain()-1)) + 1 - minBitsSymbols := highBits(uint32(s.symbolLen-1)) + 2 - if minBitsSrc < minBitsSymbols { - return uint8(minBitsSrc) - } - return uint8(minBitsSymbols) -} - -// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog -func (s *Scratch) optimalTableLog() { - tableLog := s.TableLog - minBits := s.minTableLog() - maxBitsSrc := uint8(highBits(uint32(s.br.remain()-1))) - 2 - if maxBitsSrc < tableLog { - // Accuracy can be reduced - tableLog = maxBitsSrc - } - if minBits > tableLog { - tableLog = minBits - } - // Need a minimum to safely represent all symbol values - if tableLog < minTablelog { - tableLog = minTablelog - } - if tableLog > maxTableLog { - tableLog = maxTableLog - } - s.actualTableLog = tableLog -} - -var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000} - -// normalizeCount will normalize the count of the symbols so -// the total is equal to the table size. -func (s *Scratch) normalizeCount() error { - var ( - tableLog = s.actualTableLog - scale = 62 - uint64(tableLog) - step = (1 << 62) / uint64(s.br.remain()) - vStep = uint64(1) << (scale - 20) - stillToDistribute = int16(1 << tableLog) - largest int - largestP int16 - lowThreshold = (uint32)(s.br.remain() >> tableLog) - ) - - for i, cnt := range s.count[:s.symbolLen] { - // already handled - // if (count[s] == s.length) return 0; /* rle special case */ - - if cnt == 0 { - s.norm[i] = 0 - continue - } - if cnt <= lowThreshold { - s.norm[i] = -1 - stillToDistribute-- - } else { - proba := (int16)((uint64(cnt) * step) >> scale) - if proba < 8 { - restToBeat := vStep * uint64(rtbTable[proba]) - v := uint64(cnt)*step - (uint64(proba) << scale) - if v > restToBeat { - proba++ - } - } - if proba > largestP { - largestP = proba - largest = i - } - s.norm[i] = proba - stillToDistribute -= proba - } - } - - if -stillToDistribute >= (s.norm[largest] >> 1) { - // corner case, need another normalization method - return s.normalizeCount2() - } - s.norm[largest] += stillToDistribute - return nil -} - -// Secondary normalization method. -// To be used when primary method fails. -func (s *Scratch) normalizeCount2() error { - const notYetAssigned = -2 - var ( - distributed uint32 - total = uint32(s.br.remain()) - tableLog = s.actualTableLog - lowThreshold = total >> tableLog - lowOne = (total * 3) >> (tableLog + 1) - ) - for i, cnt := range s.count[:s.symbolLen] { - if cnt == 0 { - s.norm[i] = 0 - continue - } - if cnt <= lowThreshold { - s.norm[i] = -1 - distributed++ - total -= cnt - continue - } - if cnt <= lowOne { - s.norm[i] = 1 - distributed++ - total -= cnt - continue - } - s.norm[i] = notYetAssigned - } - toDistribute := (1 << tableLog) - distributed - - if (total / toDistribute) > lowOne { - // risk of rounding to zero - lowOne = (total * 3) / (toDistribute * 2) - for i, cnt := range s.count[:s.symbolLen] { - if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { - s.norm[i] = 1 - distributed++ - total -= cnt - continue - } - } - toDistribute = (1 << tableLog) - distributed - } - if distributed == uint32(s.symbolLen)+1 { - // all values are pretty poor; - // probably incompressible data (should have already been detected); - // find max, then give all remaining points to max - var maxV int - var maxC uint32 - for i, cnt := range s.count[:s.symbolLen] { - if cnt > maxC { - maxV = i - maxC = cnt - } - } - s.norm[maxV] += int16(toDistribute) - return nil - } - - if total == 0 { - // all of the symbols were low enough for the lowOne or lowThreshold - for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) { - if s.norm[i] > 0 { - toDistribute-- - s.norm[i]++ - } - } - return nil - } - - var ( - vStepLog = 62 - uint64(tableLog) - mid = uint64((1 << (vStepLog - 1)) - 1) - rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining - tmpTotal = mid - ) - for i, cnt := range s.count[:s.symbolLen] { - if s.norm[i] == notYetAssigned { - var ( - end = tmpTotal + uint64(cnt)*rStep - sStart = uint32(tmpTotal >> vStepLog) - sEnd = uint32(end >> vStepLog) - weight = sEnd - sStart - ) - if weight < 1 { - return errors.New("weight < 1") - } - s.norm[i] = int16(weight) - tmpTotal = end - } - } - return nil -} - -// validateNorm validates the normalized histogram table. -func (s *Scratch) validateNorm() (err error) { - var total int - for _, v := range s.norm[:s.symbolLen] { - if v >= 0 { - total += int(v) - } else { - total -= int(v) - } - } - defer func() { - if err == nil { - return - } - fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen) - for i, v := range s.norm[:s.symbolLen] { - fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v) - } - }() - if total != (1 << s.actualTableLog) { - return fmt.Errorf("warning: Total == %d != %d", total, 1< tablelogAbsoluteMax { - return errors.New("tableLog too large") - } - bitStream >>= 4 - bitCount := uint(4) - - s.actualTableLog = uint8(nbBits) - remaining := int32((1 << nbBits) + 1) - threshold := int32(1 << nbBits) - gotTotal := int32(0) - nbBits++ - - for remaining > 1 { - if previous0 { - n0 := charnum - for (bitStream & 0xFFFF) == 0xFFFF { - n0 += 24 - if b.off < iend-5 { - b.advance(2) - bitStream = b.Uint32() >> bitCount - } else { - bitStream >>= 16 - bitCount += 16 - } - } - for (bitStream & 3) == 3 { - n0 += 3 - bitStream >>= 2 - bitCount += 2 - } - n0 += uint16(bitStream & 3) - bitCount += 2 - if n0 > maxSymbolValue { - return errors.New("maxSymbolValue too small") - } - for charnum < n0 { - s.norm[charnum&0xff] = 0 - charnum++ - } - - if b.off <= iend-7 || b.off+int(bitCount>>3) <= iend-4 { - b.advance(bitCount >> 3) - bitCount &= 7 - bitStream = b.Uint32() >> bitCount - } else { - bitStream >>= 2 - } - } - - max := (2*(threshold) - 1) - (remaining) - var count int32 - - if (int32(bitStream) & (threshold - 1)) < max { - count = int32(bitStream) & (threshold - 1) - bitCount += nbBits - 1 - } else { - count = int32(bitStream) & (2*threshold - 1) - if count >= threshold { - count -= max - } - bitCount += nbBits - } - - count-- // extra accuracy - if count < 0 { - // -1 means +1 - remaining += count - gotTotal -= count - } else { - remaining -= count - gotTotal += count - } - s.norm[charnum&0xff] = int16(count) - charnum++ - previous0 = count == 0 - for remaining < threshold { - nbBits-- - threshold >>= 1 - } - if b.off <= iend-7 || b.off+int(bitCount>>3) <= iend-4 { - b.advance(bitCount >> 3) - bitCount &= 7 - } else { - bitCount -= (uint)(8 * (len(b.b) - 4 - b.off)) - b.off = len(b.b) - 4 - } - bitStream = b.Uint32() >> (bitCount & 31) - } - s.symbolLen = charnum - - if s.symbolLen <= 1 { - return fmt.Errorf("symbolLen (%d) too small", s.symbolLen) - } - if s.symbolLen > maxSymbolValue+1 { - return fmt.Errorf("symbolLen (%d) too big", s.symbolLen) - } - if remaining != 1 { - return fmt.Errorf("corruption detected (remaining %d != 1)", remaining) - } - if bitCount > 32 { - return fmt.Errorf("corruption detected (bitCount %d > 32)", bitCount) - } - if gotTotal != 1<> 3) - return nil -} - -// decSymbol contains information about a state entry, -// Including the state offset base, the output symbol and -// the number of bits to read for the low part of the destination state. -type decSymbol struct { - newState uint16 - symbol uint8 - nbBits uint8 -} - -// allocDtable will allocate decoding tables if they are not big enough. -func (s *Scratch) allocDtable() { - tableSize := 1 << s.actualTableLog - if cap(s.decTable) < tableSize { - s.decTable = make([]decSymbol, tableSize) - } - s.decTable = s.decTable[:tableSize] - - if cap(s.ct.tableSymbol) < 256 { - s.ct.tableSymbol = make([]byte, 256) - } - s.ct.tableSymbol = s.ct.tableSymbol[:256] - - if cap(s.ct.stateTable) < 256 { - s.ct.stateTable = make([]uint16, 256) - } - s.ct.stateTable = s.ct.stateTable[:256] -} - -// buildDtable will build the decoding table. -func (s *Scratch) buildDtable() error { - tableSize := uint32(1 << s.actualTableLog) - highThreshold := tableSize - 1 - s.allocDtable() - symbolNext := s.ct.stateTable[:256] - - // Init, lay down lowprob symbols - s.zeroBits = false - { - largeLimit := int16(1 << (s.actualTableLog - 1)) - for i, v := range s.norm[:s.symbolLen] { - if v == -1 { - s.decTable[highThreshold].symbol = uint8(i) - highThreshold-- - symbolNext[i] = 1 - } else { - if v >= largeLimit { - s.zeroBits = true - } - symbolNext[i] = uint16(v) - } - } - } - // Spread symbols - { - tableMask := tableSize - 1 - step := tableStep(tableSize) - position := uint32(0) - for ss, v := range s.norm[:s.symbolLen] { - for i := 0; i < int(v); i++ { - s.decTable[position].symbol = uint8(ss) - position = (position + step) & tableMask - for position > highThreshold { - // lowprob area - position = (position + step) & tableMask - } - } - } - if position != 0 { - // position must reach all cells once, otherwise normalizedCounter is incorrect - return errors.New("corrupted input (position != 0)") - } - } - - // Build Decoding table - { - tableSize := uint16(1 << s.actualTableLog) - for u, v := range s.decTable { - symbol := v.symbol - nextState := symbolNext[symbol] - symbolNext[symbol] = nextState + 1 - nBits := s.actualTableLog - byte(highBits(uint32(nextState))) - s.decTable[u].nbBits = nBits - newState := (nextState << nBits) - tableSize - if newState >= tableSize { - return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize) - } - if newState == uint16(u) && nBits == 0 { - // Seems weird that this is possible with nbits > 0. - return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u) - } - s.decTable[u].newState = newState - } - } - return nil -} - -// decompress will decompress the bitstream. -// If the buffer is over-read an error is returned. -func (s *Scratch) decompress() error { - br := &s.bits - if err := br.init(s.br.unread()); err != nil { - return err - } - - var s1, s2 decoder - // Initialize and decode first state and symbol. - s1.init(br, s.decTable, s.actualTableLog) - s2.init(br, s.decTable, s.actualTableLog) - - // Use temp table to avoid bound checks/append penalty. - var tmp = s.ct.tableSymbol[:256] - var off uint8 - - // Main part - if !s.zeroBits { - for br.off >= 8 { - br.fillFast() - tmp[off+0] = s1.nextFast() - tmp[off+1] = s2.nextFast() - br.fillFast() - tmp[off+2] = s1.nextFast() - tmp[off+3] = s2.nextFast() - off += 4 - // When off is 0, we have overflowed and should write. - if off == 0 { - s.Out = append(s.Out, tmp...) - if len(s.Out) >= s.DecompressLimit { - return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit) - } - } - } - } else { - for br.off >= 8 { - br.fillFast() - tmp[off+0] = s1.next() - tmp[off+1] = s2.next() - br.fillFast() - tmp[off+2] = s1.next() - tmp[off+3] = s2.next() - off += 4 - if off == 0 { - s.Out = append(s.Out, tmp...) - // When off is 0, we have overflowed and should write. - if len(s.Out) >= s.DecompressLimit { - return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit) - } - } - } - } - s.Out = append(s.Out, tmp[:off]...) - - // Final bits, a bit more expensive check - for { - if s1.finished() { - s.Out = append(s.Out, s1.final(), s2.final()) - break - } - br.fill() - s.Out = append(s.Out, s1.next()) - if s2.finished() { - s.Out = append(s.Out, s2.final(), s1.final()) - break - } - s.Out = append(s.Out, s2.next()) - if len(s.Out) >= s.DecompressLimit { - return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit) - } - } - return br.close() -} - -// decoder keeps track of the current state and updates it from the bitstream. -type decoder struct { - state uint16 - br *bitReader - dt []decSymbol -} - -// init will initialize the decoder and read the first state from the stream. -func (d *decoder) init(in *bitReader, dt []decSymbol, tableLog uint8) { - d.dt = dt - d.br = in - d.state = in.getBits(tableLog) -} - -// next returns the next symbol and sets the next state. -// At least tablelog bits must be available in the bit reader. -func (d *decoder) next() uint8 { - n := &d.dt[d.state] - lowBits := d.br.getBits(n.nbBits) - d.state = n.newState + lowBits - return n.symbol -} - -// finished returns true if all bits have been read from the bitstream -// and the next state would require reading bits from the input. -func (d *decoder) finished() bool { - return d.br.finished() && d.dt[d.state].nbBits > 0 -} - -// final returns the current state symbol without decoding the next. -func (d *decoder) final() uint8 { - return d.dt[d.state].symbol -} - -// nextFast returns the next symbol and sets the next state. -// This can only be used if no symbols are 0 bits. -// At least tablelog bits must be available in the bit reader. -func (d *decoder) nextFast() uint8 { - n := d.dt[d.state] - lowBits := d.br.getBitsFast(n.nbBits) - d.state = n.newState + lowBits - return n.symbol -} diff --git a/vendor/github.com/klauspost/compress/fse/fse.go b/vendor/github.com/klauspost/compress/fse/fse.go deleted file mode 100644 index 535cbadfd..000000000 --- a/vendor/github.com/klauspost/compress/fse/fse.go +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright 2018 Klaus Post. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. - -// Package fse provides Finite State Entropy encoding and decoding. -// -// Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding -// for byte blocks as implemented in zstd. -// -// See https://github.com/klauspost/compress/tree/master/fse for more information. -package fse - -import ( - "errors" - "fmt" - "math/bits" -) - -const ( - /*!MEMORY_USAGE : - * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) - * Increasing memory usage improves compression ratio - * Reduced memory usage can improve speed, due to cache effect - * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ - maxMemoryUsage = 14 - defaultMemoryUsage = 13 - - maxTableLog = maxMemoryUsage - 2 - maxTablesize = 1 << maxTableLog - defaultTablelog = defaultMemoryUsage - 2 - minTablelog = 5 - maxSymbolValue = 255 -) - -var ( - // ErrIncompressible is returned when input is judged to be too hard to compress. - ErrIncompressible = errors.New("input is not compressible") - - // ErrUseRLE is returned from the compressor when the input is a single byte value repeated. - ErrUseRLE = errors.New("input is single value repeated") -) - -// Scratch provides temporary storage for compression and decompression. -type Scratch struct { - // Private - count [maxSymbolValue + 1]uint32 - norm [maxSymbolValue + 1]int16 - br byteReader - bits bitReader - bw bitWriter - ct cTable // Compression tables. - decTable []decSymbol // Decompression table. - maxCount int // count of the most probable symbol - - // Per block parameters. - // These can be used to override compression parameters of the block. - // Do not touch, unless you know what you are doing. - - // Out is output buffer. - // If the scratch is re-used before the caller is done processing the output, - // set this field to nil. - // Otherwise the output buffer will be re-used for next Compression/Decompression step - // and allocation will be avoided. - Out []byte - - // DecompressLimit limits the maximum decoded size acceptable. - // If > 0 decompression will stop when approximately this many bytes - // has been decoded. - // If 0, maximum size will be 2GB. - DecompressLimit int - - symbolLen uint16 // Length of active part of the symbol table. - actualTableLog uint8 // Selected tablelog. - zeroBits bool // no bits has prob > 50%. - clearCount bool // clear count - - // MaxSymbolValue will override the maximum symbol value of the next block. - MaxSymbolValue uint8 - - // TableLog will attempt to override the tablelog for the next block. - TableLog uint8 -} - -// Histogram allows to populate the histogram and skip that step in the compression, -// It otherwise allows to inspect the histogram when compression is done. -// To indicate that you have populated the histogram call HistogramFinished -// with the value of the highest populated symbol, as well as the number of entries -// in the most populated entry. These are accepted at face value. -// The returned slice will always be length 256. -func (s *Scratch) Histogram() []uint32 { - return s.count[:] -} - -// HistogramFinished can be called to indicate that the histogram has been populated. -// maxSymbol is the index of the highest set symbol of the next data segment. -// maxCount is the number of entries in the most populated entry. -// These are accepted at face value. -func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) { - s.maxCount = maxCount - s.symbolLen = uint16(maxSymbol) + 1 - s.clearCount = maxCount != 0 -} - -// prepare will prepare and allocate scratch tables used for both compression and decompression. -func (s *Scratch) prepare(in []byte) (*Scratch, error) { - if s == nil { - s = &Scratch{} - } - if s.MaxSymbolValue == 0 { - s.MaxSymbolValue = 255 - } - if s.TableLog == 0 { - s.TableLog = defaultTablelog - } - if s.TableLog > maxTableLog { - return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog) - } - if cap(s.Out) == 0 { - s.Out = make([]byte, 0, len(in)) - } - if s.clearCount && s.maxCount == 0 { - for i := range s.count { - s.count[i] = 0 - } - s.clearCount = false - } - s.br.init(in) - if s.DecompressLimit == 0 { - // Max size 2GB. - s.DecompressLimit = (2 << 30) - 1 - } - - return s, nil -} - -// tableStep returns the next table index. -func tableStep(tableSize uint32) uint32 { - return (tableSize >> 1) + (tableSize >> 3) + 3 -} - -func highBits(val uint32) (n uint32) { - return uint32(bits.Len32(val) - 1) -} diff --git a/vendor/github.com/klauspost/compress/gen.sh b/vendor/github.com/klauspost/compress/gen.sh deleted file mode 100644 index aff942205..000000000 --- a/vendor/github.com/klauspost/compress/gen.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -cd s2/cmd/_s2sx/ || exit 1 -go generate . diff --git a/vendor/github.com/klauspost/compress/huff0/.gitignore b/vendor/github.com/klauspost/compress/huff0/.gitignore deleted file mode 100644 index b3d262958..000000000 --- a/vendor/github.com/klauspost/compress/huff0/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/huff0-fuzz.zip diff --git a/vendor/github.com/klauspost/compress/huff0/README.md b/vendor/github.com/klauspost/compress/huff0/README.md deleted file mode 100644 index 8b6e5c663..000000000 --- a/vendor/github.com/klauspost/compress/huff0/README.md +++ /dev/null @@ -1,89 +0,0 @@ -# Huff0 entropy compression - -This package provides Huff0 encoding and decoding as used in zstd. - -[Huff0](https://github.com/Cyan4973/FiniteStateEntropy#new-generation-entropy-coders), -a Huffman codec designed for modern CPU, featuring OoO (Out of Order) operations on multiple ALU -(Arithmetic Logic Unit), achieving extremely fast compression and decompression speeds. - -This can be used for compressing input with a lot of similar input values to the smallest number of bytes. -This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders, -but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding. - -* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0) - -## News - -This is used as part of the [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression package. - -This ensures that most functionality is well tested. - -# Usage - -This package provides a low level interface that allows to compress single independent blocks. - -Each block is separate, and there is no built in integrity checks. -This means that the caller should keep track of block sizes and also do checksums if needed. - -Compressing a block is done via the [`Compress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress1X) and -[`Compress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress4X) functions. -You must provide input and will receive the output and maybe an error. - -These error values can be returned: - -| Error | Description | -|---------------------|-----------------------------------------------------------------------------| -| `` | Everything ok, output is returned | -| `ErrIncompressible` | Returned when input is judged to be too hard to compress | -| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated | -| `ErrTooBig` | Returned if the input block exceeds the maximum allowed size (128 Kib) | -| `(error)` | An internal error occurred. | - - -As can be seen above some of there are errors that will be returned even under normal operation so it is important to handle these. - -To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object -that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same -object can be used for both. - -Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this -you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output. - -The `Scratch` object will retain state that allows to re-use previous tables for encoding and decoding. - -## Tables and re-use - -Huff0 allows for reusing tables from the previous block to save space if that is expected to give better/faster results. - -The Scratch object allows you to set a [`ReusePolicy`](https://godoc.org/github.com/klauspost/compress/huff0#ReusePolicy) -that controls this behaviour. See the documentation for details. This can be altered between each block. - -Do however note that this information is *not* stored in the output block and it is up to the users of the package to -record whether [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable) should be called, -based on the boolean reported back from the CompressXX call. - -If you want to store the table separate from the data, you can access them as `OutData` and `OutTable` on the -[`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object. - -## Decompressing - -The first part of decoding is to initialize the decoding table through [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable). -This will initialize the decoding tables. -You can supply the complete block to `ReadTable` and it will return the data part of the block -which can be given to the decompressor. - -Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X) -or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function. - -For concurrently decompressing content with a fixed table a stateless [`Decoder`](https://godoc.org/github.com/klauspost/compress/huff0#Decoder) can be requested which will remain correct as long as the scratch is unchanged. The capacity of the provided slice indicates the expected output size. - -You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back -your input was likely corrupted. - -It is important to note that a successful decoding does *not* mean your output matches your original input. -There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid. - -# Contributing - -Contributions are always welcome. Be aware that adding public functions will require good justification and breaking -changes will likely not be accepted. If in doubt open an issue before writing the PR. diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go deleted file mode 100644 index bfc7a523d..000000000 --- a/vendor/github.com/klauspost/compress/huff0/bitreader.go +++ /dev/null @@ -1,224 +0,0 @@ -// Copyright 2018 Klaus Post. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. - -package huff0 - -import ( - "errors" - "fmt" - "io" - - "github.com/klauspost/compress/internal/le" -) - -// bitReader reads a bitstream in reverse. -// The last set bit indicates the start of the stream and is used -// for aligning the input. -type bitReaderBytes struct { - in []byte - off uint // next byte to read is at in[off - 1] - value uint64 - bitsRead uint8 -} - -// init initializes and resets the bit reader. -func (b *bitReaderBytes) init(in []byte) error { - if len(in) < 1 { - return errors.New("corrupt stream: too short") - } - b.in = in - b.off = uint(len(in)) - // The highest bit of the last byte indicates where to start - v := in[len(in)-1] - if v == 0 { - return errors.New("corrupt stream, did not find end of stream") - } - b.bitsRead = 64 - b.value = 0 - if len(in) >= 8 { - b.fillFastStart() - } else { - b.fill() - b.fill() - } - b.advance(8 - uint8(highBit32(uint32(v)))) - return nil -} - -// peekByteFast requires that at least one byte is requested every time. -// There are no checks if the buffer is filled. -func (b *bitReaderBytes) peekByteFast() uint8 { - got := uint8(b.value >> 56) - return got -} - -func (b *bitReaderBytes) advance(n uint8) { - b.bitsRead += n - b.value <<= n & 63 -} - -// fillFast() will make sure at least 32 bits are available. -// There must be at least 4 bytes available. -func (b *bitReaderBytes) fillFast() { - if b.bitsRead < 32 { - return - } - - // 2 bounds checks. - low := le.Load32(b.in, b.off-4) - b.value |= uint64(low) << (b.bitsRead - 32) - b.bitsRead -= 32 - b.off -= 4 -} - -// fillFastStart() assumes the bitReaderBytes is empty and there is at least 8 bytes to read. -func (b *bitReaderBytes) fillFastStart() { - // Do single re-slice to avoid bounds checks. - b.value = le.Load64(b.in, b.off-8) - b.bitsRead = 0 - b.off -= 8 -} - -// fill() will make sure at least 32 bits are available. -func (b *bitReaderBytes) fill() { - if b.bitsRead < 32 { - return - } - if b.off >= 4 { - low := le.Load32(b.in, b.off-4) - b.value |= uint64(low) << (b.bitsRead - 32) - b.bitsRead -= 32 - b.off -= 4 - return - } - for b.off > 0 { - b.value |= uint64(b.in[b.off-1]) << (b.bitsRead - 8) - b.bitsRead -= 8 - b.off-- - } -} - -// finished returns true if all bits have been read from the bit stream. -func (b *bitReaderBytes) finished() bool { - return b.off == 0 && b.bitsRead >= 64 -} - -func (b *bitReaderBytes) remaining() uint { - return b.off*8 + uint(64-b.bitsRead) -} - -// close the bitstream and returns an error if out-of-buffer reads occurred. -func (b *bitReaderBytes) close() error { - // Release reference. - b.in = nil - if b.remaining() > 0 { - return fmt.Errorf("corrupt input: %d bits remain on stream", b.remaining()) - } - if b.bitsRead > 64 { - return io.ErrUnexpectedEOF - } - return nil -} - -// bitReaderShifted reads a bitstream in reverse. -// The last set bit indicates the start of the stream and is used -// for aligning the input. -type bitReaderShifted struct { - in []byte - off uint // next byte to read is at in[off - 1] - value uint64 - bitsRead uint8 -} - -// init initializes and resets the bit reader. -func (b *bitReaderShifted) init(in []byte) error { - if len(in) < 1 { - return errors.New("corrupt stream: too short") - } - b.in = in - b.off = uint(len(in)) - // The highest bit of the last byte indicates where to start - v := in[len(in)-1] - if v == 0 { - return errors.New("corrupt stream, did not find end of stream") - } - b.bitsRead = 64 - b.value = 0 - if len(in) >= 8 { - b.fillFastStart() - } else { - b.fill() - b.fill() - } - b.advance(8 - uint8(highBit32(uint32(v)))) - return nil -} - -// peekBitsFast requires that at least one bit is requested every time. -// There are no checks if the buffer is filled. -func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 { - return uint16(b.value >> ((64 - n) & 63)) -} - -func (b *bitReaderShifted) advance(n uint8) { - b.bitsRead += n - b.value <<= n & 63 -} - -// fillFast() will make sure at least 32 bits are available. -// There must be at least 4 bytes available. -func (b *bitReaderShifted) fillFast() { - if b.bitsRead < 32 { - return - } - - low := le.Load32(b.in, b.off-4) - b.value |= uint64(low) << ((b.bitsRead - 32) & 63) - b.bitsRead -= 32 - b.off -= 4 -} - -// fillFastStart() assumes the bitReaderShifted is empty and there is at least 8 bytes to read. -func (b *bitReaderShifted) fillFastStart() { - b.value = le.Load64(b.in, b.off-8) - b.bitsRead = 0 - b.off -= 8 -} - -// fill() will make sure at least 32 bits are available. -func (b *bitReaderShifted) fill() { - if b.bitsRead < 32 { - return - } - if b.off > 4 { - low := le.Load32(b.in, b.off-4) - b.value |= uint64(low) << ((b.bitsRead - 32) & 63) - b.bitsRead -= 32 - b.off -= 4 - return - } - for b.off > 0 { - b.value |= uint64(b.in[b.off-1]) << ((b.bitsRead - 8) & 63) - b.bitsRead -= 8 - b.off-- - } -} - -func (b *bitReaderShifted) remaining() uint { - return b.off*8 + uint(64-b.bitsRead) -} - -// close the bitstream and returns an error if out-of-buffer reads occurred. -func (b *bitReaderShifted) close() error { - // Release reference. - b.in = nil - if b.remaining() > 0 { - return fmt.Errorf("corrupt input: %d bits remain on stream", b.remaining()) - } - if b.bitsRead > 64 { - return io.ErrUnexpectedEOF - } - return nil -} diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go deleted file mode 100644 index 0ebc9aaac..000000000 --- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2018 Klaus Post. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. - -package huff0 - -// bitWriter will write bits. -// First bit will be LSB of the first byte of output. -type bitWriter struct { - bitContainer uint64 - nBits uint8 - out []byte -} - -// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. -// It will not check if there is space for them, so the caller must ensure that it has flushed recently. -func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { - b.bitContainer |= uint64(value) << (b.nBits & 63) - b.nBits += bits -} - -// encSymbol will add up to 16 bits. value may not contain more set bits than indicated. -// It will not check if there is space for them, so the caller must ensure that it has flushed recently. -func (b *bitWriter) encSymbol(ct cTable, symbol byte) { - enc := ct[symbol] - b.bitContainer |= uint64(enc.val) << (b.nBits & 63) - if false { - if enc.nBits == 0 { - panic("nbits 0") - } - } - b.nBits += enc.nBits -} - -// encTwoSymbols will add up to 32 bits. value may not contain more set bits than indicated. -// It will not check if there is space for them, so the caller must ensure that it has flushed recently. -func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) { - encA := ct[av] - encB := ct[bv] - sh := b.nBits & 63 - combined := uint64(encA.val) | (uint64(encB.val) << (encA.nBits & 63)) - b.bitContainer |= combined << sh - if false { - if encA.nBits == 0 { - panic("nbitsA 0") - } - if encB.nBits == 0 { - panic("nbitsB 0") - } - } - b.nBits += encA.nBits + encB.nBits -} - -// encFourSymbols adds up to 32 bits from four symbols. -// It will not check if there is space for them, -// so the caller must ensure that b has been flushed recently. -func (b *bitWriter) encFourSymbols(encA, encB, encC, encD cTableEntry) { - bitsA := encA.nBits - bitsB := bitsA + encB.nBits - bitsC := bitsB + encC.nBits - bitsD := bitsC + encD.nBits - combined := uint64(encA.val) | - (uint64(encB.val) << (bitsA & 63)) | - (uint64(encC.val) << (bitsB & 63)) | - (uint64(encD.val) << (bitsC & 63)) - b.bitContainer |= combined << (b.nBits & 63) - b.nBits += bitsD -} - -// flush32 will flush out, so there are at least 32 bits available for writing. -func (b *bitWriter) flush32() { - if b.nBits < 32 { - return - } - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24)) - b.nBits -= 32 - b.bitContainer >>= 32 -} - -// flushAlign will flush remaining full bytes and align to next byte boundary. -func (b *bitWriter) flushAlign() { - nbBytes := (b.nBits + 7) >> 3 - for i := uint8(0); i < nbBytes; i++ { - b.out = append(b.out, byte(b.bitContainer>>(i*8))) - } - b.nBits = 0 - b.bitContainer = 0 -} - -// close will write the alignment bit and write the final byte(s) -// to the output. -func (b *bitWriter) close() { - // End mark - b.addBits16Clean(1, 1) - // flush until next byte. - b.flushAlign() -} diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go deleted file mode 100644 index 84aa3d12f..000000000 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ /dev/null @@ -1,742 +0,0 @@ -package huff0 - -import ( - "fmt" - "math" - "runtime" - "sync" -) - -// Compress1X will compress the input. -// The output can be decoded using Decompress1X. -// Supply a Scratch object. The scratch object contains state about re-use, -// So when sharing across independent encodes, be sure to set the re-use policy. -func Compress1X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) { - s, err = s.prepare(in) - if err != nil { - return nil, false, err - } - return compress(in, s, s.compress1X) -} - -// Compress4X will compress the input. The input is split into 4 independent blocks -// and compressed similar to Compress1X. -// The output can be decoded using Decompress4X. -// Supply a Scratch object. The scratch object contains state about re-use, -// So when sharing across independent encodes, be sure to set the re-use policy. -func Compress4X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) { - s, err = s.prepare(in) - if err != nil { - return nil, false, err - } - if false { - // TODO: compress4Xp only slightly faster. - const parallelThreshold = 8 << 10 - if len(in) < parallelThreshold || runtime.GOMAXPROCS(0) == 1 { - return compress(in, s, s.compress4X) - } - return compress(in, s, s.compress4Xp) - } - return compress(in, s, s.compress4X) -} - -func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)) (out []byte, reUsed bool, err error) { - // Nuke previous table if we cannot reuse anyway. - if s.Reuse == ReusePolicyNone { - s.prevTable = s.prevTable[:0] - } - - // Create histogram, if none was provided. - maxCount := s.maxCount - var canReuse = false - if maxCount == 0 { - maxCount, canReuse = s.countSimple(in) - } else { - canReuse = s.canUseTable(s.prevTable) - } - - // We want the output size to be less than this: - wantSize := len(in) - if s.WantLogLess > 0 { - wantSize -= wantSize >> s.WantLogLess - } - - // Reset for next run. - s.clearCount = true - s.maxCount = 0 - if maxCount >= len(in) { - if maxCount > len(in) { - return nil, false, fmt.Errorf("maxCount (%d) > length (%d)", maxCount, len(in)) - } - if len(in) == 1 { - return nil, false, ErrIncompressible - } - // One symbol, use RLE - return nil, false, ErrUseRLE - } - if maxCount == 1 || maxCount < (len(in)>>7) { - // Each symbol present maximum once or too well distributed. - return nil, false, ErrIncompressible - } - if s.Reuse == ReusePolicyMust && !canReuse { - // We must reuse, but we can't. - return nil, false, ErrIncompressible - } - if (s.Reuse == ReusePolicyPrefer || s.Reuse == ReusePolicyMust) && canReuse { - keepTable := s.cTable - keepTL := s.actualTableLog - s.cTable = s.prevTable - s.actualTableLog = s.prevTableLog - s.Out, err = compressor(in) - s.cTable = keepTable - s.actualTableLog = keepTL - if err == nil && len(s.Out) < wantSize { - s.OutData = s.Out - return s.Out, true, nil - } - if s.Reuse == ReusePolicyMust { - return nil, false, ErrIncompressible - } - // Do not attempt to re-use later. - s.prevTable = s.prevTable[:0] - } - - // Calculate new table. - err = s.buildCTable() - if err != nil { - return nil, false, err - } - - if false && !s.canUseTable(s.cTable) { - panic("invalid table generated") - } - - if s.Reuse == ReusePolicyAllow && canReuse { - hSize := len(s.Out) - oldSize := s.prevTable.estimateSize(s.count[:s.symbolLen]) - newSize := s.cTable.estimateSize(s.count[:s.symbolLen]) - if oldSize <= hSize+newSize || hSize+12 >= wantSize { - // Retain cTable even if we re-use. - keepTable := s.cTable - keepTL := s.actualTableLog - - s.cTable = s.prevTable - s.actualTableLog = s.prevTableLog - s.Out, err = compressor(in) - - // Restore ctable. - s.cTable = keepTable - s.actualTableLog = keepTL - if err != nil { - return nil, false, err - } - if len(s.Out) >= wantSize { - return nil, false, ErrIncompressible - } - s.OutData = s.Out - return s.Out, true, nil - } - } - - // Use new table - err = s.cTable.write(s) - if err != nil { - s.OutTable = nil - return nil, false, err - } - s.OutTable = s.Out - - // Compress using new table - s.Out, err = compressor(in) - if err != nil { - s.OutTable = nil - return nil, false, err - } - if len(s.Out) >= wantSize { - s.OutTable = nil - return nil, false, ErrIncompressible - } - // Move current table into previous. - s.prevTable, s.prevTableLog, s.cTable = s.cTable, s.actualTableLog, s.prevTable[:0] - s.OutData = s.Out[len(s.OutTable):] - return s.Out, false, nil -} - -// EstimateSizes will estimate the data sizes -func EstimateSizes(in []byte, s *Scratch) (tableSz, dataSz, reuseSz int, err error) { - s, err = s.prepare(in) - if err != nil { - return 0, 0, 0, err - } - - // Create histogram, if none was provided. - tableSz, dataSz, reuseSz = -1, -1, -1 - maxCount := s.maxCount - var canReuse = false - if maxCount == 0 { - maxCount, canReuse = s.countSimple(in) - } else { - canReuse = s.canUseTable(s.prevTable) - } - - // We want the output size to be less than this: - wantSize := len(in) - if s.WantLogLess > 0 { - wantSize -= wantSize >> s.WantLogLess - } - - // Reset for next run. - s.clearCount = true - s.maxCount = 0 - if maxCount >= len(in) { - if maxCount > len(in) { - return 0, 0, 0, fmt.Errorf("maxCount (%d) > length (%d)", maxCount, len(in)) - } - if len(in) == 1 { - return 0, 0, 0, ErrIncompressible - } - // One symbol, use RLE - return 0, 0, 0, ErrUseRLE - } - if maxCount == 1 || maxCount < (len(in)>>7) { - // Each symbol present maximum once or too well distributed. - return 0, 0, 0, ErrIncompressible - } - - // Calculate new table. - err = s.buildCTable() - if err != nil { - return 0, 0, 0, err - } - - if false && !s.canUseTable(s.cTable) { - panic("invalid table generated") - } - - tableSz, err = s.cTable.estTableSize(s) - if err != nil { - return 0, 0, 0, err - } - if canReuse { - reuseSz = s.prevTable.estimateSize(s.count[:s.symbolLen]) - } - dataSz = s.cTable.estimateSize(s.count[:s.symbolLen]) - - // Restore - return tableSz, dataSz, reuseSz, nil -} - -func (s *Scratch) compress1X(src []byte) ([]byte, error) { - return s.compress1xDo(s.Out, src), nil -} - -func (s *Scratch) compress1xDo(dst, src []byte) []byte { - var bw = bitWriter{out: dst} - - // N is length divisible by 4. - n := len(src) - n -= n & 3 - cTable := s.cTable[:256] - - // Encode last bytes. - for i := len(src) & 3; i > 0; i-- { - bw.encSymbol(cTable, src[n+i-1]) - } - n -= 4 - if s.actualTableLog <= 8 { - for ; n >= 0; n -= 4 { - tmp := src[n : n+4] - // tmp should be len 4 - bw.flush32() - bw.encFourSymbols(cTable[tmp[3]], cTable[tmp[2]], cTable[tmp[1]], cTable[tmp[0]]) - } - } else { - for ; n >= 0; n -= 4 { - tmp := src[n : n+4] - // tmp should be len 4 - bw.flush32() - bw.encTwoSymbols(cTable, tmp[3], tmp[2]) - bw.flush32() - bw.encTwoSymbols(cTable, tmp[1], tmp[0]) - } - } - bw.close() - return bw.out -} - -var sixZeros [6]byte - -func (s *Scratch) compress4X(src []byte) ([]byte, error) { - if len(src) < 12 { - return nil, ErrIncompressible - } - segmentSize := (len(src) + 3) / 4 - - // Add placeholder for output length - offsetIdx := len(s.Out) - s.Out = append(s.Out, sixZeros[:]...) - - for i := 0; i < 4; i++ { - toDo := src - if len(toDo) > segmentSize { - toDo = toDo[:segmentSize] - } - src = src[len(toDo):] - - idx := len(s.Out) - s.Out = s.compress1xDo(s.Out, toDo) - if len(s.Out)-idx > math.MaxUint16 { - // We cannot store the size in the jump table - return nil, ErrIncompressible - } - // Write compressed length as little endian before block. - if i < 3 { - // Last length is not written. - length := len(s.Out) - idx - s.Out[i*2+offsetIdx] = byte(length) - s.Out[i*2+offsetIdx+1] = byte(length >> 8) - } - } - - return s.Out, nil -} - -// compress4Xp will compress 4 streams using separate goroutines. -func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { - if len(src) < 12 { - return nil, ErrIncompressible - } - // Add placeholder for output length - s.Out = s.Out[:6] - - segmentSize := (len(src) + 3) / 4 - var wg sync.WaitGroup - wg.Add(4) - for i := 0; i < 4; i++ { - toDo := src - if len(toDo) > segmentSize { - toDo = toDo[:segmentSize] - } - src = src[len(toDo):] - - // Separate goroutine for each block. - go func(i int) { - s.tmpOut[i] = s.compress1xDo(s.tmpOut[i][:0], toDo) - wg.Done() - }(i) - } - wg.Wait() - for i := 0; i < 4; i++ { - o := s.tmpOut[i] - if len(o) > math.MaxUint16 { - // We cannot store the size in the jump table - return nil, ErrIncompressible - } - // Write compressed length as little endian before block. - if i < 3 { - // Last length is not written. - s.Out[i*2] = byte(len(o)) - s.Out[i*2+1] = byte(len(o) >> 8) - } - - // Write output. - s.Out = append(s.Out, o...) - } - return s.Out, nil -} - -// countSimple will create a simple histogram in s.count. -// Returns the biggest count. -// Does not update s.clearCount. -func (s *Scratch) countSimple(in []byte) (max int, reuse bool) { - reuse = true - _ = s.count // Assert that s != nil to speed up the following loop. - for _, v := range in { - s.count[v]++ - } - m := uint32(0) - if len(s.prevTable) > 0 { - for i, v := range s.count[:] { - if v == 0 { - continue - } - if v > m { - m = v - } - s.symbolLen = uint16(i) + 1 - if i >= len(s.prevTable) { - reuse = false - } else if s.prevTable[i].nBits == 0 { - reuse = false - } - } - return int(m), reuse - } - for i, v := range s.count[:] { - if v == 0 { - continue - } - if v > m { - m = v - } - s.symbolLen = uint16(i) + 1 - } - return int(m), false -} - -func (s *Scratch) canUseTable(c cTable) bool { - if len(c) < int(s.symbolLen) { - return false - } - for i, v := range s.count[:s.symbolLen] { - if v != 0 && c[i].nBits == 0 { - return false - } - } - return true -} - -//lint:ignore U1000 used for debugging -func (s *Scratch) validateTable(c cTable) bool { - if len(c) < int(s.symbolLen) { - return false - } - for i, v := range s.count[:s.symbolLen] { - if v != 0 { - if c[i].nBits == 0 { - return false - } - if c[i].nBits > s.actualTableLog { - return false - } - } - } - return true -} - -// minTableLog provides the minimum logSize to safely represent a distribution. -func (s *Scratch) minTableLog() uint8 { - minBitsSrc := highBit32(uint32(s.srcLen)) + 1 - minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2 - if minBitsSrc < minBitsSymbols { - return uint8(minBitsSrc) - } - return uint8(minBitsSymbols) -} - -// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog -func (s *Scratch) optimalTableLog() { - tableLog := s.TableLog - minBits := s.minTableLog() - maxBitsSrc := uint8(highBit32(uint32(s.srcLen-1))) - 1 - if maxBitsSrc < tableLog { - // Accuracy can be reduced - tableLog = maxBitsSrc - } - if minBits > tableLog { - tableLog = minBits - } - // Need a minimum to safely represent all symbol values - if tableLog < minTablelog { - tableLog = minTablelog - } - if tableLog > tableLogMax { - tableLog = tableLogMax - } - s.actualTableLog = tableLog -} - -type cTableEntry struct { - val uint16 - nBits uint8 - // We have 8 bits extra -} - -const huffNodesMask = huffNodesLen - 1 - -func (s *Scratch) buildCTable() error { - s.optimalTableLog() - s.huffSort() - if cap(s.cTable) < maxSymbolValue+1 { - s.cTable = make([]cTableEntry, s.symbolLen, maxSymbolValue+1) - } else { - s.cTable = s.cTable[:s.symbolLen] - for i := range s.cTable { - s.cTable[i] = cTableEntry{} - } - } - - var startNode = int16(s.symbolLen) - nonNullRank := s.symbolLen - 1 - - nodeNb := startNode - huffNode := s.nodes[1 : huffNodesLen+1] - - // This overlays the slice above, but allows "-1" index lookups. - // Different from reference implementation. - huffNode0 := s.nodes[0 : huffNodesLen+1] - - for huffNode[nonNullRank].count() == 0 { - nonNullRank-- - } - - lowS := int16(nonNullRank) - nodeRoot := nodeNb + lowS - 1 - lowN := nodeNb - huffNode[nodeNb].setCount(huffNode[lowS].count() + huffNode[lowS-1].count()) - huffNode[lowS].setParent(nodeNb) - huffNode[lowS-1].setParent(nodeNb) - nodeNb++ - lowS -= 2 - for n := nodeNb; n <= nodeRoot; n++ { - huffNode[n].setCount(1 << 30) - } - // fake entry, strong barrier - huffNode0[0].setCount(1 << 31) - - // create parents - for nodeNb <= nodeRoot { - var n1, n2 int16 - if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() { - n1 = lowS - lowS-- - } else { - n1 = lowN - lowN++ - } - if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() { - n2 = lowS - lowS-- - } else { - n2 = lowN - lowN++ - } - - huffNode[nodeNb].setCount(huffNode0[n1+1].count() + huffNode0[n2+1].count()) - huffNode0[n1+1].setParent(nodeNb) - huffNode0[n2+1].setParent(nodeNb) - nodeNb++ - } - - // distribute weights (unlimited tree height) - huffNode[nodeRoot].setNbBits(0) - for n := nodeRoot - 1; n >= startNode; n-- { - huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1) - } - for n := uint16(0); n <= nonNullRank; n++ { - huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1) - } - s.actualTableLog = s.setMaxHeight(int(nonNullRank)) - maxNbBits := s.actualTableLog - - // fill result into tree (val, nbBits) - if maxNbBits > tableLogMax { - return fmt.Errorf("internal error: maxNbBits (%d) > tableLogMax (%d)", maxNbBits, tableLogMax) - } - var nbPerRank [tableLogMax + 1]uint16 - var valPerRank [16]uint16 - for _, v := range huffNode[:nonNullRank+1] { - nbPerRank[v.nbBits()]++ - } - // determine stating value per rank - { - min := uint16(0) - for n := maxNbBits; n > 0; n-- { - // get starting value within each rank - valPerRank[n] = min - min += nbPerRank[n] - min >>= 1 - } - } - - // push nbBits per symbol, symbol order - for _, v := range huffNode[:nonNullRank+1] { - s.cTable[v.symbol()].nBits = v.nbBits() - } - - // assign value within rank, symbol order - t := s.cTable[:s.symbolLen] - for n, val := range t { - nbits := val.nBits & 15 - v := valPerRank[nbits] - t[n].val = v - valPerRank[nbits] = v + 1 - } - - return nil -} - -// huffSort will sort symbols, decreasing order. -func (s *Scratch) huffSort() { - type rankPos struct { - base uint32 - current uint32 - } - - // Clear nodes - nodes := s.nodes[:huffNodesLen+1] - s.nodes = nodes - nodes = nodes[1 : huffNodesLen+1] - - // Sort into buckets based on length of symbol count. - var rank [32]rankPos - for _, v := range s.count[:s.symbolLen] { - r := highBit32(v+1) & 31 - rank[r].base++ - } - // maxBitLength is log2(BlockSizeMax) + 1 - const maxBitLength = 18 + 1 - for n := maxBitLength; n > 0; n-- { - rank[n-1].base += rank[n].base - } - for n := range rank[:maxBitLength] { - rank[n].current = rank[n].base - } - for n, c := range s.count[:s.symbolLen] { - r := (highBit32(c+1) + 1) & 31 - pos := rank[r].current - rank[r].current++ - prev := nodes[(pos-1)&huffNodesMask] - for pos > rank[r].base && c > prev.count() { - nodes[pos&huffNodesMask] = prev - pos-- - prev = nodes[(pos-1)&huffNodesMask] - } - nodes[pos&huffNodesMask] = makeNodeElt(c, byte(n)) - } -} - -func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { - maxNbBits := s.actualTableLog - huffNode := s.nodes[1 : huffNodesLen+1] - //huffNode = huffNode[: huffNodesLen] - - largestBits := huffNode[lastNonNull].nbBits() - - // early exit : no elt > maxNbBits - if largestBits <= maxNbBits { - return largestBits - } - totalCost := int(0) - baseCost := int(1) << (largestBits - maxNbBits) - n := uint32(lastNonNull) - - for huffNode[n].nbBits() > maxNbBits { - totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits())) - huffNode[n].setNbBits(maxNbBits) - n-- - } - // n stops at huffNode[n].nbBits <= maxNbBits - - for huffNode[n].nbBits() == maxNbBits { - n-- - } - // n end at index of smallest symbol using < maxNbBits - - // renorm totalCost - totalCost >>= largestBits - maxNbBits /* note : totalCost is necessarily a multiple of baseCost */ - - // repay normalized cost - { - const noSymbol = 0xF0F0F0F0 - var rankLast [tableLogMax + 2]uint32 - - for i := range rankLast[:] { - rankLast[i] = noSymbol - } - - // Get pos of last (smallest) symbol per rank - { - currentNbBits := maxNbBits - for pos := int(n); pos >= 0; pos-- { - if huffNode[pos].nbBits() >= currentNbBits { - continue - } - currentNbBits = huffNode[pos].nbBits() // < maxNbBits - rankLast[maxNbBits-currentNbBits] = uint32(pos) - } - } - - for totalCost > 0 { - nBitsToDecrease := uint8(highBit32(uint32(totalCost))) + 1 - - for ; nBitsToDecrease > 1; nBitsToDecrease-- { - highPos := rankLast[nBitsToDecrease] - lowPos := rankLast[nBitsToDecrease-1] - if highPos == noSymbol { - continue - } - if lowPos == noSymbol { - break - } - highTotal := huffNode[highPos].count() - lowTotal := 2 * huffNode[lowPos].count() - if highTotal <= lowTotal { - break - } - } - // only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) - // HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary - // FIXME: try to remove - for (nBitsToDecrease <= tableLogMax) && (rankLast[nBitsToDecrease] == noSymbol) { - nBitsToDecrease++ - } - totalCost -= 1 << (nBitsToDecrease - 1) - if rankLast[nBitsToDecrease-1] == noSymbol { - // this rank is no longer empty - rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease] - } - huffNode[rankLast[nBitsToDecrease]].setNbBits(1 + - huffNode[rankLast[nBitsToDecrease]].nbBits()) - if rankLast[nBitsToDecrease] == 0 { - /* special case, reached largest symbol */ - rankLast[nBitsToDecrease] = noSymbol - } else { - rankLast[nBitsToDecrease]-- - if huffNode[rankLast[nBitsToDecrease]].nbBits() != maxNbBits-nBitsToDecrease { - rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */ - } - } - } - - for totalCost < 0 { /* Sometimes, cost correction overshoot */ - if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ - for huffNode[n].nbBits() == maxNbBits { - n-- - } - huffNode[n+1].setNbBits(huffNode[n+1].nbBits() - 1) - rankLast[1] = n + 1 - totalCost++ - continue - } - huffNode[rankLast[1]+1].setNbBits(huffNode[rankLast[1]+1].nbBits() - 1) - rankLast[1]++ - totalCost++ - } - } - return maxNbBits -} - -// A nodeElt is the fields -// -// count uint32 -// parent uint16 -// symbol byte -// nbBits uint8 -// -// in some order, all squashed into an integer so that the compiler -// always loads and stores entire nodeElts instead of separate fields. -type nodeElt uint64 - -func makeNodeElt(count uint32, symbol byte) nodeElt { - return nodeElt(count) | nodeElt(symbol)<<48 -} - -func (e *nodeElt) count() uint32 { return uint32(*e) } -func (e *nodeElt) parent() uint16 { return uint16(*e >> 32) } -func (e *nodeElt) symbol() byte { return byte(*e >> 48) } -func (e *nodeElt) nbBits() uint8 { return uint8(*e >> 56) } - -func (e *nodeElt) setCount(c uint32) { *e = (*e)&0xffffffff00000000 | nodeElt(c) } -func (e *nodeElt) setParent(p int16) { *e = (*e)&0xffff0000ffffffff | nodeElt(uint16(p))<<32 } -func (e *nodeElt) setNbBits(n uint8) { *e = (*e)&0x00ffffffffffffff | nodeElt(n)<<56 } diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go deleted file mode 100644 index 0f56b02d7..000000000 --- a/vendor/github.com/klauspost/compress/huff0/decompress.go +++ /dev/null @@ -1,1167 +0,0 @@ -package huff0 - -import ( - "errors" - "fmt" - "io" - "sync" - - "github.com/klauspost/compress/fse" -) - -type dTable struct { - single []dEntrySingle -} - -// single-symbols decoding -type dEntrySingle struct { - entry uint16 -} - -// Uses special code for all tables that are < 8 bits. -const use8BitTables = true - -// ReadTable will read a table from the input. -// The size of the input may be larger than the table definition. -// Any content remaining after the table definition will be returned. -// If no Scratch is provided a new one is allocated. -// The returned Scratch can be used for encoding or decoding input using this table. -func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { - s, err = s.prepare(nil) - if err != nil { - return s, nil, err - } - if len(in) <= 1 { - return s, nil, errors.New("input too small for table") - } - iSize := in[0] - in = in[1:] - if iSize >= 128 { - // Uncompressed - oSize := iSize - 127 - iSize = (oSize + 1) / 2 - if int(iSize) > len(in) { - return s, nil, errors.New("input too small for table") - } - for n := uint8(0); n < oSize; n += 2 { - v := in[n/2] - s.huffWeight[n] = v >> 4 - s.huffWeight[n+1] = v & 15 - } - s.symbolLen = uint16(oSize) - in = in[iSize:] - } else { - if len(in) < int(iSize) { - return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in)) - } - // FSE compressed weights - s.fse.DecompressLimit = 255 - hw := s.huffWeight[:] - s.fse.Out = hw - b, err := fse.Decompress(in[:iSize], s.fse) - s.fse.Out = nil - if err != nil { - return s, nil, fmt.Errorf("fse decompress returned: %w", err) - } - if len(b) > 255 { - return s, nil, errors.New("corrupt input: output table too large") - } - s.symbolLen = uint16(len(b)) - in = in[iSize:] - } - - // collect weight stats - var rankStats [16]uint32 - weightTotal := uint32(0) - for _, v := range s.huffWeight[:s.symbolLen] { - if v > tableLogMax { - return s, nil, errors.New("corrupt input: weight too large") - } - v2 := v & 15 - rankStats[v2]++ - // (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0. - weightTotal += (1 << v2) >> 1 - } - if weightTotal == 0 { - return s, nil, errors.New("corrupt input: weights zero") - } - - // get last non-null symbol weight (implied, total must be 2^n) - { - tableLog := highBit32(weightTotal) + 1 - if tableLog > tableLogMax { - return s, nil, errors.New("corrupt input: tableLog too big") - } - s.actualTableLog = uint8(tableLog) - // determine last weight - { - total := uint32(1) << tableLog - rest := total - weightTotal - verif := uint32(1) << highBit32(rest) - lastWeight := highBit32(rest) + 1 - if verif != rest { - // last value must be a clean power of 2 - return s, nil, errors.New("corrupt input: last value not power of two") - } - s.huffWeight[s.symbolLen] = uint8(lastWeight) - s.symbolLen++ - rankStats[lastWeight]++ - } - } - - if (rankStats[1] < 2) || (rankStats[1]&1 != 0) { - // by construction : at least 2 elts of rank 1, must be even - return s, nil, errors.New("corrupt input: min elt size, even check failed ") - } - - // TODO: Choose between single/double symbol decoding - - // Calculate starting value for each rank - { - var nextRankStart uint32 - for n := uint8(1); n < s.actualTableLog+1; n++ { - current := nextRankStart - nextRankStart += rankStats[n] << (n - 1) - rankStats[n] = current - } - } - - // fill DTable (always full size) - tSize := 1 << tableLogMax - if len(s.dt.single) != tSize { - s.dt.single = make([]dEntrySingle, tSize) - } - cTable := s.prevTable - if cap(cTable) < maxSymbolValue+1 { - cTable = make([]cTableEntry, 0, maxSymbolValue+1) - } - cTable = cTable[:maxSymbolValue+1] - s.prevTable = cTable[:s.symbolLen] - s.prevTableLog = s.actualTableLog - - for n, w := range s.huffWeight[:s.symbolLen] { - if w == 0 { - cTable[n] = cTableEntry{ - val: 0, - nBits: 0, - } - continue - } - length := (uint32(1) << w) >> 1 - d := dEntrySingle{ - entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8), - } - - rank := &rankStats[w] - cTable[n] = cTableEntry{ - val: uint16(*rank >> (w - 1)), - nBits: uint8(d.entry), - } - - single := s.dt.single[*rank : *rank+length] - for i := range single { - single[i] = d - } - *rank += length - } - - return s, in, nil -} - -// Decompress1X will decompress a 1X encoded stream. -// The length of the supplied input must match the end of a block exactly. -// Before this is called, the table must be initialized with ReadTable unless -// the encoder re-used the table. -// deprecated: Use the stateless Decoder() to get a concurrent version. -func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) { - if cap(s.Out) < s.MaxDecodedSize { - s.Out = make([]byte, s.MaxDecodedSize) - } - s.Out = s.Out[:0:s.MaxDecodedSize] - s.Out, err = s.Decoder().Decompress1X(s.Out, in) - return s.Out, err -} - -// Decompress4X will decompress a 4X encoded stream. -// Before this is called, the table must be initialized with ReadTable unless -// the encoder re-used the table. -// The length of the supplied input must match the end of a block exactly. -// The destination size of the uncompressed data must be known and provided. -// deprecated: Use the stateless Decoder() to get a concurrent version. -func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) { - if dstSize > s.MaxDecodedSize { - return nil, ErrMaxDecodedSizeExceeded - } - if cap(s.Out) < dstSize { - s.Out = make([]byte, s.MaxDecodedSize) - } - s.Out = s.Out[:0:dstSize] - s.Out, err = s.Decoder().Decompress4X(s.Out, in) - return s.Out, err -} - -// Decoder will return a stateless decoder that can be used by multiple -// decompressors concurrently. -// Before this is called, the table must be initialized with ReadTable. -// The Decoder is still linked to the scratch buffer so that cannot be reused. -// However, it is safe to discard the scratch. -func (s *Scratch) Decoder() *Decoder { - return &Decoder{ - dt: s.dt, - actualTableLog: s.actualTableLog, - bufs: &s.decPool, - } -} - -// Decoder provides stateless decoding. -type Decoder struct { - dt dTable - actualTableLog uint8 - bufs *sync.Pool -} - -func (d *Decoder) buffer() *[4][256]byte { - buf, ok := d.bufs.Get().(*[4][256]byte) - if ok { - return buf - } - return &[4][256]byte{} -} - -// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8. -// The cap of the output buffer will be the maximum decompressed size. -// The length of the supplied input must match the end of a block exactly. -func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { - if d.actualTableLog == 8 { - return d.decompress1X8BitExactly(dst, src) - } - var br bitReaderBytes - err := br.init(src) - if err != nil { - return dst, err - } - maxDecodedSize := cap(dst) - dst = dst[:0] - - // Avoid bounds check by always having full sized table. - dt := d.dt.single[:256] - - // Use temp table to avoid bound checks/append penalty. - bufs := d.buffer() - buf := &bufs[0] - var off uint8 - - switch d.actualTableLog { - case 8: - const shift = 0 - for br.off >= 4 { - br.fillFast() - v := dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+0] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+1] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+2] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+3] = uint8(v.entry >> 8) - - off += 4 - if off == 0 { - if len(dst)+256 > maxDecodedSize { - br.close() - d.bufs.Put(bufs) - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:]...) - } - } - case 7: - const shift = 8 - 7 - for br.off >= 4 { - br.fillFast() - v := dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+0] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+1] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+2] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+3] = uint8(v.entry >> 8) - - off += 4 - if off == 0 { - if len(dst)+256 > maxDecodedSize { - br.close() - d.bufs.Put(bufs) - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:]...) - } - } - case 6: - const shift = 8 - 6 - for br.off >= 4 { - br.fillFast() - v := dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+0] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+1] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+2] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+3] = uint8(v.entry >> 8) - - off += 4 - if off == 0 { - if len(dst)+256 > maxDecodedSize { - d.bufs.Put(bufs) - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:]...) - } - } - case 5: - const shift = 8 - 5 - for br.off >= 4 { - br.fillFast() - v := dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+0] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+1] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+2] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+3] = uint8(v.entry >> 8) - - off += 4 - if off == 0 { - if len(dst)+256 > maxDecodedSize { - d.bufs.Put(bufs) - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:]...) - } - } - case 4: - const shift = 8 - 4 - for br.off >= 4 { - br.fillFast() - v := dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+0] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+1] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+2] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+3] = uint8(v.entry >> 8) - - off += 4 - if off == 0 { - if len(dst)+256 > maxDecodedSize { - d.bufs.Put(bufs) - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:]...) - } - } - case 3: - const shift = 8 - 3 - for br.off >= 4 { - br.fillFast() - v := dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+0] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+1] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+2] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+3] = uint8(v.entry >> 8) - - off += 4 - if off == 0 { - if len(dst)+256 > maxDecodedSize { - d.bufs.Put(bufs) - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:]...) - } - } - case 2: - const shift = 8 - 2 - for br.off >= 4 { - br.fillFast() - v := dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+0] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+1] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+2] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+3] = uint8(v.entry >> 8) - - off += 4 - if off == 0 { - if len(dst)+256 > maxDecodedSize { - d.bufs.Put(bufs) - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:]...) - } - } - case 1: - const shift = 8 - 1 - for br.off >= 4 { - br.fillFast() - v := dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+0] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+1] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+2] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>(56+shift))] - br.advance(uint8(v.entry)) - buf[off+3] = uint8(v.entry >> 8) - - off += 4 - if off == 0 { - if len(dst)+256 > maxDecodedSize { - d.bufs.Put(bufs) - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:]...) - } - } - default: - d.bufs.Put(bufs) - return nil, fmt.Errorf("invalid tablelog: %d", d.actualTableLog) - } - - if len(dst)+int(off) > maxDecodedSize { - d.bufs.Put(bufs) - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:off]...) - - // br < 4, so uint8 is fine - bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead)) - shift := (8 - d.actualTableLog) & 7 - - for bitsLeft > 0 { - if br.bitsRead >= 64-8 { - for br.off > 0 { - br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) - br.bitsRead -= 8 - br.off-- - } - } - if len(dst) >= maxDecodedSize { - br.close() - d.bufs.Put(bufs) - return nil, ErrMaxDecodedSizeExceeded - } - v := dt[br.peekByteFast()>>shift] - nBits := uint8(v.entry) - br.advance(nBits) - bitsLeft -= int8(nBits) - dst = append(dst, uint8(v.entry>>8)) - } - d.bufs.Put(bufs) - return dst, br.close() -} - -// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8. -// The cap of the output buffer will be the maximum decompressed size. -// The length of the supplied input must match the end of a block exactly. -func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { - var br bitReaderBytes - err := br.init(src) - if err != nil { - return dst, err - } - maxDecodedSize := cap(dst) - dst = dst[:0] - - // Avoid bounds check by always having full sized table. - dt := d.dt.single[:256] - - // Use temp table to avoid bound checks/append penalty. - bufs := d.buffer() - buf := &bufs[0] - var off uint8 - - const shift = 56 - - //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog) - for br.off >= 4 { - br.fillFast() - v := dt[uint8(br.value>>shift)] - br.advance(uint8(v.entry)) - buf[off+0] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>shift)] - br.advance(uint8(v.entry)) - buf[off+1] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>shift)] - br.advance(uint8(v.entry)) - buf[off+2] = uint8(v.entry >> 8) - - v = dt[uint8(br.value>>shift)] - br.advance(uint8(v.entry)) - buf[off+3] = uint8(v.entry >> 8) - - off += 4 - if off == 0 { - if len(dst)+256 > maxDecodedSize { - d.bufs.Put(bufs) - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:]...) - } - } - - if len(dst)+int(off) > maxDecodedSize { - d.bufs.Put(bufs) - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:off]...) - - // br < 4, so uint8 is fine - bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead)) - for bitsLeft > 0 { - if br.bitsRead >= 64-8 { - for br.off > 0 { - br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) - br.bitsRead -= 8 - br.off-- - } - } - if len(dst) >= maxDecodedSize { - d.bufs.Put(bufs) - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - v := dt[br.peekByteFast()] - nBits := uint8(v.entry) - br.advance(nBits) - bitsLeft -= int8(nBits) - dst = append(dst, uint8(v.entry>>8)) - } - d.bufs.Put(bufs) - return dst, br.close() -} - -// Decompress4X will decompress a 4X encoded stream. -// The length of the supplied input must match the end of a block exactly. -// The *capacity* of the dst slice must match the destination size of -// the uncompressed data exactly. -func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { - if d.actualTableLog == 8 { - return d.decompress4X8bitExactly(dst, src) - } - - var br [4]bitReaderBytes - start := 6 - for i := 0; i < 3; i++ { - length := int(src[i*2]) | (int(src[i*2+1]) << 8) - if start+length >= len(src) { - return nil, errors.New("truncated input (or invalid offset)") - } - err := br[i].init(src[start : start+length]) - if err != nil { - return nil, err - } - start += length - } - err := br[3].init(src[start:]) - if err != nil { - return nil, err - } - - // destination, offset to match first output - dstSize := cap(dst) - dst = dst[:dstSize] - out := dst - dstEvery := (dstSize + 3) / 4 - - shift := (56 + (8 - d.actualTableLog)) & 63 - - const tlSize = 1 << 8 - single := d.dt.single[:tlSize] - - // Use temp table to avoid bound checks/append penalty. - buf := d.buffer() - var off uint8 - var decoded int - - // Decode 4 values from each decoder/loop. - const bufoff = 256 - for { - if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { - break - } - - { - // Interleave 2 decodes. - const stream = 0 - const stream2 = 1 - br1 := &br[stream] - br2 := &br[stream2] - br1.fillFast() - br2.fillFast() - - v := single[uint8(br1.value>>shift)].entry - v2 := single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off] = uint8(v >> 8) - buf[stream2][off] = uint8(v2 >> 8) - - v = single[uint8(br1.value>>shift)].entry - v2 = single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off+1] = uint8(v >> 8) - buf[stream2][off+1] = uint8(v2 >> 8) - - v = single[uint8(br1.value>>shift)].entry - v2 = single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off+2] = uint8(v >> 8) - buf[stream2][off+2] = uint8(v2 >> 8) - - v = single[uint8(br1.value>>shift)].entry - v2 = single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off+3] = uint8(v >> 8) - buf[stream2][off+3] = uint8(v2 >> 8) - } - - { - const stream = 2 - const stream2 = 3 - br1 := &br[stream] - br2 := &br[stream2] - br1.fillFast() - br2.fillFast() - - v := single[uint8(br1.value>>shift)].entry - v2 := single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off] = uint8(v >> 8) - buf[stream2][off] = uint8(v2 >> 8) - - v = single[uint8(br1.value>>shift)].entry - v2 = single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off+1] = uint8(v >> 8) - buf[stream2][off+1] = uint8(v2 >> 8) - - v = single[uint8(br1.value>>shift)].entry - v2 = single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off+2] = uint8(v >> 8) - buf[stream2][off+2] = uint8(v2 >> 8) - - v = single[uint8(br1.value>>shift)].entry - v2 = single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off+3] = uint8(v >> 8) - buf[stream2][off+3] = uint8(v2 >> 8) - } - - off += 4 - - if off == 0 { - if bufoff > dstEvery { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 1") - } - // There must at least be 3 buffers left. - if len(out)-bufoff < dstEvery*3 { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 2") - } - //copy(out, buf[0][:]) - //copy(out[dstEvery:], buf[1][:]) - //copy(out[dstEvery*2:], buf[2][:]) - *(*[bufoff]byte)(out) = buf[0] - *(*[bufoff]byte)(out[dstEvery:]) = buf[1] - *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2] - *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3] - out = out[bufoff:] - decoded += bufoff * 4 - } - } - if off > 0 { - ioff := int(off) - if len(out) < dstEvery*3+ioff { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 3") - } - copy(out, buf[0][:off]) - copy(out[dstEvery:], buf[1][:off]) - copy(out[dstEvery*2:], buf[2][:off]) - copy(out[dstEvery*3:], buf[3][:off]) - decoded += int(off) * 4 - out = out[off:] - } - - // Decode remaining. - // Decode remaining. - remainBytes := dstEvery - (decoded / 4) - for i := range br { - offset := dstEvery * i - endsAt := offset + remainBytes - if endsAt > len(out) { - endsAt = len(out) - } - br := &br[i] - bitsLeft := br.remaining() - for bitsLeft > 0 { - if br.finished() { - d.bufs.Put(buf) - return nil, io.ErrUnexpectedEOF - } - if br.bitsRead >= 56 { - if br.off >= 4 { - v := br.in[br.off-4:] - v = v[:4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - br.value |= uint64(low) << (br.bitsRead - 32) - br.bitsRead -= 32 - br.off -= 4 - } else { - for br.off > 0 { - br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) - br.bitsRead -= 8 - br.off-- - } - } - } - // end inline... - if offset >= endsAt { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 4") - } - - // Read value and increment offset. - v := single[uint8(br.value>>shift)].entry - nBits := uint8(v) - br.advance(nBits) - bitsLeft -= uint(nBits) - out[offset] = uint8(v >> 8) - offset++ - } - if offset != endsAt { - d.bufs.Put(buf) - return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) - } - decoded += offset - dstEvery*i - err = br.close() - if err != nil { - d.bufs.Put(buf) - return nil, err - } - } - d.bufs.Put(buf) - if dstSize != decoded { - return nil, errors.New("corruption detected: short output block") - } - return dst, nil -} - -// Decompress4X will decompress a 4X encoded stream. -// The length of the supplied input must match the end of a block exactly. -// The *capacity* of the dst slice must match the destination size of -// the uncompressed data exactly. -func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { - var br [4]bitReaderBytes - start := 6 - for i := 0; i < 3; i++ { - length := int(src[i*2]) | (int(src[i*2+1]) << 8) - if start+length >= len(src) { - return nil, errors.New("truncated input (or invalid offset)") - } - err := br[i].init(src[start : start+length]) - if err != nil { - return nil, err - } - start += length - } - err := br[3].init(src[start:]) - if err != nil { - return nil, err - } - - // destination, offset to match first output - dstSize := cap(dst) - dst = dst[:dstSize] - out := dst - dstEvery := (dstSize + 3) / 4 - - const shift = 56 - const tlSize = 1 << 8 - single := d.dt.single[:tlSize] - - // Use temp table to avoid bound checks/append penalty. - buf := d.buffer() - var off uint8 - var decoded int - - // Decode 4 values from each decoder/loop. - const bufoff = 256 - for { - if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { - break - } - - { - // Interleave 2 decodes. - const stream = 0 - const stream2 = 1 - br1 := &br[stream] - br2 := &br[stream2] - br1.fillFast() - br2.fillFast() - - v := single[uint8(br1.value>>shift)].entry - v2 := single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off] = uint8(v >> 8) - buf[stream2][off] = uint8(v2 >> 8) - - v = single[uint8(br1.value>>shift)].entry - v2 = single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off+1] = uint8(v >> 8) - buf[stream2][off+1] = uint8(v2 >> 8) - - v = single[uint8(br1.value>>shift)].entry - v2 = single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off+2] = uint8(v >> 8) - buf[stream2][off+2] = uint8(v2 >> 8) - - v = single[uint8(br1.value>>shift)].entry - v2 = single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off+3] = uint8(v >> 8) - buf[stream2][off+3] = uint8(v2 >> 8) - } - - { - const stream = 2 - const stream2 = 3 - br1 := &br[stream] - br2 := &br[stream2] - br1.fillFast() - br2.fillFast() - - v := single[uint8(br1.value>>shift)].entry - v2 := single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off] = uint8(v >> 8) - buf[stream2][off] = uint8(v2 >> 8) - - v = single[uint8(br1.value>>shift)].entry - v2 = single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off+1] = uint8(v >> 8) - buf[stream2][off+1] = uint8(v2 >> 8) - - v = single[uint8(br1.value>>shift)].entry - v2 = single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off+2] = uint8(v >> 8) - buf[stream2][off+2] = uint8(v2 >> 8) - - v = single[uint8(br1.value>>shift)].entry - v2 = single[uint8(br2.value>>shift)].entry - br1.bitsRead += uint8(v) - br1.value <<= v & 63 - br2.bitsRead += uint8(v2) - br2.value <<= v2 & 63 - buf[stream][off+3] = uint8(v >> 8) - buf[stream2][off+3] = uint8(v2 >> 8) - } - - off += 4 - - if off == 0 { - if bufoff > dstEvery { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 1") - } - // There must at least be 3 buffers left. - if len(out)-bufoff < dstEvery*3 { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 2") - } - - //copy(out, buf[0][:]) - //copy(out[dstEvery:], buf[1][:]) - //copy(out[dstEvery*2:], buf[2][:]) - // copy(out[dstEvery*3:], buf[3][:]) - *(*[bufoff]byte)(out) = buf[0] - *(*[bufoff]byte)(out[dstEvery:]) = buf[1] - *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2] - *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3] - out = out[bufoff:] - decoded += bufoff * 4 - } - } - if off > 0 { - ioff := int(off) - if len(out) < dstEvery*3+ioff { - return nil, errors.New("corruption detected: stream overrun 3") - } - copy(out, buf[0][:off]) - copy(out[dstEvery:], buf[1][:off]) - copy(out[dstEvery*2:], buf[2][:off]) - copy(out[dstEvery*3:], buf[3][:off]) - decoded += int(off) * 4 - out = out[off:] - } - - // Decode remaining. - remainBytes := dstEvery - (decoded / 4) - for i := range br { - offset := dstEvery * i - endsAt := offset + remainBytes - if endsAt > len(out) { - endsAt = len(out) - } - br := &br[i] - bitsLeft := br.remaining() - for bitsLeft > 0 { - if br.finished() { - d.bufs.Put(buf) - return nil, io.ErrUnexpectedEOF - } - if br.bitsRead >= 56 { - if br.off >= 4 { - v := br.in[br.off-4:] - v = v[:4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - br.value |= uint64(low) << (br.bitsRead - 32) - br.bitsRead -= 32 - br.off -= 4 - } else { - for br.off > 0 { - br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) - br.bitsRead -= 8 - br.off-- - } - } - } - // end inline... - if offset >= endsAt { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 4") - } - - // Read value and increment offset. - v := single[br.peekByteFast()].entry - nBits := uint8(v) - br.advance(nBits) - bitsLeft -= uint(nBits) - out[offset] = uint8(v >> 8) - offset++ - } - if offset != endsAt { - d.bufs.Put(buf) - return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) - } - - decoded += offset - dstEvery*i - err = br.close() - if err != nil { - d.bufs.Put(buf) - return nil, err - } - } - d.bufs.Put(buf) - if dstSize != decoded { - return nil, errors.New("corruption detected: short output block") - } - return dst, nil -} - -// matches will compare a decoding table to a coding table. -// Errors are written to the writer. -// Nothing will be written if table is ok. -func (s *Scratch) matches(ct cTable, w io.Writer) { - if s == nil || len(s.dt.single) == 0 { - return - } - dt := s.dt.single[:1<>8) == byte(sym) { - fmt.Fprintf(w, "symbol %x has decoder, but no encoder\n", sym) - errs++ - break - } - } - if errs == 0 { - broken-- - } - continue - } - // Unused bits in input - ub := tablelog - enc.nBits - top := enc.val << ub - // decoder looks at top bits. - dec := dt[top] - if uint8(dec.entry) != enc.nBits { - fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", sym, enc.nBits, uint8(dec.entry)) - errs++ - } - if uint8(dec.entry>>8) != uint8(sym) { - fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", sym, sym, uint8(dec.entry>>8)) - errs++ - } - if errs > 0 { - fmt.Fprintf(w, "%d errors in base, stopping\n", errs) - continue - } - // Ensure that all combinations are covered. - for i := uint16(0); i < (1 << ub); i++ { - vval := top | i - dec := dt[vval] - if uint8(dec.entry) != enc.nBits { - fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", vval, enc.nBits, uint8(dec.entry)) - errs++ - } - if uint8(dec.entry>>8) != uint8(sym) { - fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", vval, sym, uint8(dec.entry>>8)) - errs++ - } - if errs > 20 { - fmt.Fprintf(w, "%d errors, stopping\n", errs) - break - } - } - if errs == 0 { - ok++ - broken-- - } - } - if broken > 0 { - fmt.Fprintf(w, "%d broken, %d ok\n", broken, ok) - } -} diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go deleted file mode 100644 index ba7e8e6b0..000000000 --- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go +++ /dev/null @@ -1,226 +0,0 @@ -//go:build amd64 && !appengine && !noasm && gc -// +build amd64,!appengine,!noasm,gc - -// This file contains the specialisation of Decoder.Decompress4X -// and Decoder.Decompress1X that use an asm implementation of thir main loops. -package huff0 - -import ( - "errors" - "fmt" - - "github.com/klauspost/compress/internal/cpuinfo" -) - -// decompress4x_main_loop_x86 is an x86 assembler implementation -// of Decompress4X when tablelog > 8. -// -//go:noescape -func decompress4x_main_loop_amd64(ctx *decompress4xContext) - -// decompress4x_8b_loop_x86 is an x86 assembler implementation -// of Decompress4X when tablelog <= 8 which decodes 4 entries -// per loop. -// -//go:noescape -func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) - -// fallback8BitSize is the size where using Go version is faster. -const fallback8BitSize = 800 - -type decompress4xContext struct { - pbr *[4]bitReaderShifted - peekBits uint8 - out *byte - dstEvery int - tbl *dEntrySingle - decoded int - limit *byte -} - -// Decompress4X will decompress a 4X encoded stream. -// The length of the supplied input must match the end of a block exactly. -// The *capacity* of the dst slice must match the destination size of -// the uncompressed data exactly. -func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { - if len(d.dt.single) == 0 { - return nil, errors.New("no table loaded") - } - if len(src) < 6+(4*1) { - return nil, errors.New("input too small") - } - - use8BitTables := d.actualTableLog <= 8 - if cap(dst) < fallback8BitSize && use8BitTables { - return d.decompress4X8bit(dst, src) - } - - var br [4]bitReaderShifted - // Decode "jump table" - start := 6 - for i := 0; i < 3; i++ { - length := int(src[i*2]) | (int(src[i*2+1]) << 8) - if start+length >= len(src) { - return nil, errors.New("truncated input (or invalid offset)") - } - err := br[i].init(src[start : start+length]) - if err != nil { - return nil, err - } - start += length - } - err := br[3].init(src[start:]) - if err != nil { - return nil, err - } - - // destination, offset to match first output - dstSize := cap(dst) - dst = dst[:dstSize] - out := dst - dstEvery := (dstSize + 3) / 4 - - const tlSize = 1 << tableLogMax - const tlMask = tlSize - 1 - single := d.dt.single[:tlSize] - - var decoded int - - if len(out) > 4*4 && !(br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4) { - ctx := decompress4xContext{ - pbr: &br, - peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast() - out: &out[0], - dstEvery: dstEvery, - tbl: &single[0], - limit: &out[dstEvery-4], // Always stop decoding when first buffer gets here to avoid writing OOB on last. - } - if use8BitTables { - decompress4x_8b_main_loop_amd64(&ctx) - } else { - decompress4x_main_loop_amd64(&ctx) - } - - decoded = ctx.decoded - out = out[decoded/4:] - } - - // Decode remaining. - remainBytes := dstEvery - (decoded / 4) - for i := range br { - offset := dstEvery * i - endsAt := offset + remainBytes - if endsAt > len(out) { - endsAt = len(out) - } - br := &br[i] - bitsLeft := br.remaining() - for bitsLeft > 0 { - br.fill() - if offset >= endsAt { - return nil, errors.New("corruption detected: stream overrun 4") - } - - // Read value and increment offset. - val := br.peekBitsFast(d.actualTableLog) - v := single[val&tlMask].entry - nBits := uint8(v) - br.advance(nBits) - bitsLeft -= uint(nBits) - out[offset] = uint8(v >> 8) - offset++ - } - if offset != endsAt { - return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) - } - decoded += offset - dstEvery*i - err = br.close() - if err != nil { - return nil, err - } - } - if dstSize != decoded { - return nil, errors.New("corruption detected: short output block") - } - return dst, nil -} - -// decompress4x_main_loop_x86 is an x86 assembler implementation -// of Decompress1X when tablelog > 8. -// -//go:noescape -func decompress1x_main_loop_amd64(ctx *decompress1xContext) - -// decompress4x_main_loop_x86 is an x86 with BMI2 assembler implementation -// of Decompress1X when tablelog > 8. -// -//go:noescape -func decompress1x_main_loop_bmi2(ctx *decompress1xContext) - -type decompress1xContext struct { - pbr *bitReaderShifted - peekBits uint8 - out *byte - outCap int - tbl *dEntrySingle - decoded int -} - -// Error reported by asm implementations -const error_max_decoded_size_exeeded = -1 - -// Decompress1X will decompress a 1X encoded stream. -// The cap of the output buffer will be the maximum decompressed size. -// The length of the supplied input must match the end of a block exactly. -func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { - if len(d.dt.single) == 0 { - return nil, errors.New("no table loaded") - } - var br bitReaderShifted - err := br.init(src) - if err != nil { - return dst, err - } - maxDecodedSize := cap(dst) - dst = dst[:maxDecodedSize] - - const tlSize = 1 << tableLogMax - const tlMask = tlSize - 1 - - if maxDecodedSize >= 4 { - ctx := decompress1xContext{ - pbr: &br, - out: &dst[0], - outCap: maxDecodedSize, - peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast() - tbl: &d.dt.single[0], - } - - if cpuinfo.HasBMI2() { - decompress1x_main_loop_bmi2(&ctx) - } else { - decompress1x_main_loop_amd64(&ctx) - } - if ctx.decoded == error_max_decoded_size_exeeded { - return nil, ErrMaxDecodedSizeExceeded - } - - dst = dst[:ctx.decoded] - } - - // br < 8, so uint8 is fine - bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead - for bitsLeft > 0 { - br.fill() - if len(dst) >= maxDecodedSize { - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask] - nBits := uint8(v.entry) - br.advance(nBits) - bitsLeft -= nBits - dst = append(dst, uint8(v.entry>>8)) - } - return dst, br.close() -} diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s deleted file mode 100644 index c4c7ab2d1..000000000 --- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s +++ /dev/null @@ -1,830 +0,0 @@ -// Code generated by command: go run gen.go -out ../decompress_amd64.s -pkg=huff0. DO NOT EDIT. - -//go:build amd64 && !appengine && !noasm && gc - -// func decompress4x_main_loop_amd64(ctx *decompress4xContext) -TEXT ·decompress4x_main_loop_amd64(SB), $0-8 - // Preload values - MOVQ ctx+0(FP), AX - MOVBQZX 8(AX), DI - MOVQ 16(AX), BX - MOVQ 48(AX), SI - MOVQ 24(AX), R8 - MOVQ 32(AX), R9 - MOVQ (AX), R10 - - // Main loop -main_loop: - XORL DX, DX - CMPQ BX, SI - SETGE DL - - // br0.fillFast32() - MOVQ 32(R10), R11 - MOVBQZX 40(R10), R12 - CMPQ R12, $0x20 - JBE skip_fill0 - MOVQ 24(R10), AX - SUBQ $0x20, R12 - SUBQ $0x04, AX - MOVQ (R10), R13 - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (AX)(R13*1), R13 - MOVQ R12, CX - SHLQ CL, R13 - MOVQ AX, 24(R10) - ORQ R13, R11 - - // exhausted += (br0.off < 4) - CMPQ AX, $0x04 - ADCB $+0, DL - -skip_fill0: - // val0 := br0.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v0 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br0.advance(uint8(v0.entry) - MOVB CH, AL - SHLQ CL, R11 - ADDB CL, R12 - - // val1 := br0.peekTopBits(peekBits) - MOVQ DI, CX - MOVQ R11, R13 - SHRQ CL, R13 - - // v1 := table[val1&mask] - MOVW (R9)(R13*2), CX - - // br0.advance(uint8(v1.entry)) - MOVB CH, AH - SHLQ CL, R11 - ADDB CL, R12 - - // these two writes get coalesced - // out[id * dstEvery + 0] = uint8(v0.entry >> 8) - // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - MOVW AX, (BX) - - // update the bitreader structure - MOVQ R11, 32(R10) - MOVB R12, 40(R10) - - // br1.fillFast32() - MOVQ 80(R10), R11 - MOVBQZX 88(R10), R12 - CMPQ R12, $0x20 - JBE skip_fill1 - MOVQ 72(R10), AX - SUBQ $0x20, R12 - SUBQ $0x04, AX - MOVQ 48(R10), R13 - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (AX)(R13*1), R13 - MOVQ R12, CX - SHLQ CL, R13 - MOVQ AX, 72(R10) - ORQ R13, R11 - - // exhausted += (br1.off < 4) - CMPQ AX, $0x04 - ADCB $+0, DL - -skip_fill1: - // val0 := br1.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v0 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br1.advance(uint8(v0.entry) - MOVB CH, AL - SHLQ CL, R11 - ADDB CL, R12 - - // val1 := br1.peekTopBits(peekBits) - MOVQ DI, CX - MOVQ R11, R13 - SHRQ CL, R13 - - // v1 := table[val1&mask] - MOVW (R9)(R13*2), CX - - // br1.advance(uint8(v1.entry)) - MOVB CH, AH - SHLQ CL, R11 - ADDB CL, R12 - - // these two writes get coalesced - // out[id * dstEvery + 0] = uint8(v0.entry >> 8) - // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - MOVW AX, (BX)(R8*1) - - // update the bitreader structure - MOVQ R11, 80(R10) - MOVB R12, 88(R10) - - // br2.fillFast32() - MOVQ 128(R10), R11 - MOVBQZX 136(R10), R12 - CMPQ R12, $0x20 - JBE skip_fill2 - MOVQ 120(R10), AX - SUBQ $0x20, R12 - SUBQ $0x04, AX - MOVQ 96(R10), R13 - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (AX)(R13*1), R13 - MOVQ R12, CX - SHLQ CL, R13 - MOVQ AX, 120(R10) - ORQ R13, R11 - - // exhausted += (br2.off < 4) - CMPQ AX, $0x04 - ADCB $+0, DL - -skip_fill2: - // val0 := br2.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v0 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br2.advance(uint8(v0.entry) - MOVB CH, AL - SHLQ CL, R11 - ADDB CL, R12 - - // val1 := br2.peekTopBits(peekBits) - MOVQ DI, CX - MOVQ R11, R13 - SHRQ CL, R13 - - // v1 := table[val1&mask] - MOVW (R9)(R13*2), CX - - // br2.advance(uint8(v1.entry)) - MOVB CH, AH - SHLQ CL, R11 - ADDB CL, R12 - - // these two writes get coalesced - // out[id * dstEvery + 0] = uint8(v0.entry >> 8) - // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - MOVW AX, (BX)(R8*2) - - // update the bitreader structure - MOVQ R11, 128(R10) - MOVB R12, 136(R10) - - // br3.fillFast32() - MOVQ 176(R10), R11 - MOVBQZX 184(R10), R12 - CMPQ R12, $0x20 - JBE skip_fill3 - MOVQ 168(R10), AX - SUBQ $0x20, R12 - SUBQ $0x04, AX - MOVQ 144(R10), R13 - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (AX)(R13*1), R13 - MOVQ R12, CX - SHLQ CL, R13 - MOVQ AX, 168(R10) - ORQ R13, R11 - - // exhausted += (br3.off < 4) - CMPQ AX, $0x04 - ADCB $+0, DL - -skip_fill3: - // val0 := br3.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v0 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br3.advance(uint8(v0.entry) - MOVB CH, AL - SHLQ CL, R11 - ADDB CL, R12 - - // val1 := br3.peekTopBits(peekBits) - MOVQ DI, CX - MOVQ R11, R13 - SHRQ CL, R13 - - // v1 := table[val1&mask] - MOVW (R9)(R13*2), CX - - // br3.advance(uint8(v1.entry)) - MOVB CH, AH - SHLQ CL, R11 - ADDB CL, R12 - - // these two writes get coalesced - // out[id * dstEvery + 0] = uint8(v0.entry >> 8) - // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - LEAQ (R8)(R8*2), CX - MOVW AX, (BX)(CX*1) - - // update the bitreader structure - MOVQ R11, 176(R10) - MOVB R12, 184(R10) - ADDQ $0x02, BX - TESTB DL, DL - JZ main_loop - MOVQ ctx+0(FP), AX - SUBQ 16(AX), BX - SHLQ $0x02, BX - MOVQ BX, 40(AX) - RET - -// func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) -TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8 - // Preload values - MOVQ ctx+0(FP), CX - MOVBQZX 8(CX), DI - MOVQ 16(CX), BX - MOVQ 48(CX), SI - MOVQ 24(CX), R8 - MOVQ 32(CX), R9 - MOVQ (CX), R10 - - // Main loop -main_loop: - XORL DX, DX - CMPQ BX, SI - SETGE DL - - // br0.fillFast32() - MOVQ 32(R10), R11 - MOVBQZX 40(R10), R12 - CMPQ R12, $0x20 - JBE skip_fill0 - MOVQ 24(R10), R13 - SUBQ $0x20, R12 - SUBQ $0x04, R13 - MOVQ (R10), R14 - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (R13)(R14*1), R14 - MOVQ R12, CX - SHLQ CL, R14 - MOVQ R13, 24(R10) - ORQ R14, R11 - - // exhausted += (br0.off < 4) - CMPQ R13, $0x04 - ADCB $+0, DL - -skip_fill0: - // val0 := br0.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v0 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br0.advance(uint8(v0.entry) - MOVB CH, AL - SHLQ CL, R11 - ADDB CL, R12 - - // val1 := br0.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v1 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br0.advance(uint8(v1.entry) - MOVB CH, AH - SHLQ CL, R11 - ADDB CL, R12 - BSWAPL AX - - // val2 := br0.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v2 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br0.advance(uint8(v2.entry) - MOVB CH, AH - SHLQ CL, R11 - ADDB CL, R12 - - // val3 := br0.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v3 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br0.advance(uint8(v3.entry) - MOVB CH, AL - SHLQ CL, R11 - ADDB CL, R12 - BSWAPL AX - - // these four writes get coalesced - // out[id * dstEvery + 0] = uint8(v0.entry >> 8) - // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - // out[id * dstEvery + 3] = uint8(v2.entry >> 8) - // out[id * dstEvery + 4] = uint8(v3.entry >> 8) - MOVL AX, (BX) - - // update the bitreader structure - MOVQ R11, 32(R10) - MOVB R12, 40(R10) - - // br1.fillFast32() - MOVQ 80(R10), R11 - MOVBQZX 88(R10), R12 - CMPQ R12, $0x20 - JBE skip_fill1 - MOVQ 72(R10), R13 - SUBQ $0x20, R12 - SUBQ $0x04, R13 - MOVQ 48(R10), R14 - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (R13)(R14*1), R14 - MOVQ R12, CX - SHLQ CL, R14 - MOVQ R13, 72(R10) - ORQ R14, R11 - - // exhausted += (br1.off < 4) - CMPQ R13, $0x04 - ADCB $+0, DL - -skip_fill1: - // val0 := br1.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v0 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br1.advance(uint8(v0.entry) - MOVB CH, AL - SHLQ CL, R11 - ADDB CL, R12 - - // val1 := br1.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v1 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br1.advance(uint8(v1.entry) - MOVB CH, AH - SHLQ CL, R11 - ADDB CL, R12 - BSWAPL AX - - // val2 := br1.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v2 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br1.advance(uint8(v2.entry) - MOVB CH, AH - SHLQ CL, R11 - ADDB CL, R12 - - // val3 := br1.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v3 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br1.advance(uint8(v3.entry) - MOVB CH, AL - SHLQ CL, R11 - ADDB CL, R12 - BSWAPL AX - - // these four writes get coalesced - // out[id * dstEvery + 0] = uint8(v0.entry >> 8) - // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - // out[id * dstEvery + 3] = uint8(v2.entry >> 8) - // out[id * dstEvery + 4] = uint8(v3.entry >> 8) - MOVL AX, (BX)(R8*1) - - // update the bitreader structure - MOVQ R11, 80(R10) - MOVB R12, 88(R10) - - // br2.fillFast32() - MOVQ 128(R10), R11 - MOVBQZX 136(R10), R12 - CMPQ R12, $0x20 - JBE skip_fill2 - MOVQ 120(R10), R13 - SUBQ $0x20, R12 - SUBQ $0x04, R13 - MOVQ 96(R10), R14 - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (R13)(R14*1), R14 - MOVQ R12, CX - SHLQ CL, R14 - MOVQ R13, 120(R10) - ORQ R14, R11 - - // exhausted += (br2.off < 4) - CMPQ R13, $0x04 - ADCB $+0, DL - -skip_fill2: - // val0 := br2.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v0 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br2.advance(uint8(v0.entry) - MOVB CH, AL - SHLQ CL, R11 - ADDB CL, R12 - - // val1 := br2.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v1 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br2.advance(uint8(v1.entry) - MOVB CH, AH - SHLQ CL, R11 - ADDB CL, R12 - BSWAPL AX - - // val2 := br2.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v2 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br2.advance(uint8(v2.entry) - MOVB CH, AH - SHLQ CL, R11 - ADDB CL, R12 - - // val3 := br2.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v3 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br2.advance(uint8(v3.entry) - MOVB CH, AL - SHLQ CL, R11 - ADDB CL, R12 - BSWAPL AX - - // these four writes get coalesced - // out[id * dstEvery + 0] = uint8(v0.entry >> 8) - // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - // out[id * dstEvery + 3] = uint8(v2.entry >> 8) - // out[id * dstEvery + 4] = uint8(v3.entry >> 8) - MOVL AX, (BX)(R8*2) - - // update the bitreader structure - MOVQ R11, 128(R10) - MOVB R12, 136(R10) - - // br3.fillFast32() - MOVQ 176(R10), R11 - MOVBQZX 184(R10), R12 - CMPQ R12, $0x20 - JBE skip_fill3 - MOVQ 168(R10), R13 - SUBQ $0x20, R12 - SUBQ $0x04, R13 - MOVQ 144(R10), R14 - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (R13)(R14*1), R14 - MOVQ R12, CX - SHLQ CL, R14 - MOVQ R13, 168(R10) - ORQ R14, R11 - - // exhausted += (br3.off < 4) - CMPQ R13, $0x04 - ADCB $+0, DL - -skip_fill3: - // val0 := br3.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v0 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br3.advance(uint8(v0.entry) - MOVB CH, AL - SHLQ CL, R11 - ADDB CL, R12 - - // val1 := br3.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v1 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br3.advance(uint8(v1.entry) - MOVB CH, AH - SHLQ CL, R11 - ADDB CL, R12 - BSWAPL AX - - // val2 := br3.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v2 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br3.advance(uint8(v2.entry) - MOVB CH, AH - SHLQ CL, R11 - ADDB CL, R12 - - // val3 := br3.peekTopBits(peekBits) - MOVQ R11, R13 - MOVQ DI, CX - SHRQ CL, R13 - - // v3 := table[val0&mask] - MOVW (R9)(R13*2), CX - - // br3.advance(uint8(v3.entry) - MOVB CH, AL - SHLQ CL, R11 - ADDB CL, R12 - BSWAPL AX - - // these four writes get coalesced - // out[id * dstEvery + 0] = uint8(v0.entry >> 8) - // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - // out[id * dstEvery + 3] = uint8(v2.entry >> 8) - // out[id * dstEvery + 4] = uint8(v3.entry >> 8) - LEAQ (R8)(R8*2), CX - MOVL AX, (BX)(CX*1) - - // update the bitreader structure - MOVQ R11, 176(R10) - MOVB R12, 184(R10) - ADDQ $0x04, BX - TESTB DL, DL - JZ main_loop - MOVQ ctx+0(FP), AX - SUBQ 16(AX), BX - SHLQ $0x02, BX - MOVQ BX, 40(AX) - RET - -// func decompress1x_main_loop_amd64(ctx *decompress1xContext) -TEXT ·decompress1x_main_loop_amd64(SB), $0-8 - MOVQ ctx+0(FP), CX - MOVQ 16(CX), DX - MOVQ 24(CX), BX - CMPQ BX, $0x04 - JB error_max_decoded_size_exceeded - LEAQ (DX)(BX*1), BX - MOVQ (CX), SI - MOVQ (SI), R8 - MOVQ 24(SI), R9 - MOVQ 32(SI), R10 - MOVBQZX 40(SI), R11 - MOVQ 32(CX), SI - MOVBQZX 8(CX), DI - JMP loop_condition - -main_loop: - // Check if we have room for 4 bytes in the output buffer - LEAQ 4(DX), CX - CMPQ CX, BX - JGE error_max_decoded_size_exceeded - - // Decode 4 values - CMPQ R11, $0x20 - JL bitReader_fillFast_1_end - SUBQ $0x20, R11 - SUBQ $0x04, R9 - MOVL (R8)(R9*1), R12 - MOVQ R11, CX - SHLQ CL, R12 - ORQ R12, R10 - -bitReader_fillFast_1_end: - MOVQ DI, CX - MOVQ R10, R12 - SHRQ CL, R12 - MOVW (SI)(R12*2), CX - MOVB CH, AL - MOVBQZX CL, CX - ADDQ CX, R11 - SHLQ CL, R10 - MOVQ DI, CX - MOVQ R10, R12 - SHRQ CL, R12 - MOVW (SI)(R12*2), CX - MOVB CH, AH - MOVBQZX CL, CX - ADDQ CX, R11 - SHLQ CL, R10 - BSWAPL AX - CMPQ R11, $0x20 - JL bitReader_fillFast_2_end - SUBQ $0x20, R11 - SUBQ $0x04, R9 - MOVL (R8)(R9*1), R12 - MOVQ R11, CX - SHLQ CL, R12 - ORQ R12, R10 - -bitReader_fillFast_2_end: - MOVQ DI, CX - MOVQ R10, R12 - SHRQ CL, R12 - MOVW (SI)(R12*2), CX - MOVB CH, AH - MOVBQZX CL, CX - ADDQ CX, R11 - SHLQ CL, R10 - MOVQ DI, CX - MOVQ R10, R12 - SHRQ CL, R12 - MOVW (SI)(R12*2), CX - MOVB CH, AL - MOVBQZX CL, CX - ADDQ CX, R11 - SHLQ CL, R10 - BSWAPL AX - - // Store the decoded values - MOVL AX, (DX) - ADDQ $0x04, DX - -loop_condition: - CMPQ R9, $0x08 - JGE main_loop - - // Update ctx structure - MOVQ ctx+0(FP), AX - SUBQ 16(AX), DX - MOVQ DX, 40(AX) - MOVQ (AX), AX - MOVQ R9, 24(AX) - MOVQ R10, 32(AX) - MOVB R11, 40(AX) - RET - - // Report error -error_max_decoded_size_exceeded: - MOVQ ctx+0(FP), AX - MOVQ $-1, CX - MOVQ CX, 40(AX) - RET - -// func decompress1x_main_loop_bmi2(ctx *decompress1xContext) -// Requires: BMI2 -TEXT ·decompress1x_main_loop_bmi2(SB), $0-8 - MOVQ ctx+0(FP), CX - MOVQ 16(CX), DX - MOVQ 24(CX), BX - CMPQ BX, $0x04 - JB error_max_decoded_size_exceeded - LEAQ (DX)(BX*1), BX - MOVQ (CX), SI - MOVQ (SI), R8 - MOVQ 24(SI), R9 - MOVQ 32(SI), R10 - MOVBQZX 40(SI), R11 - MOVQ 32(CX), SI - MOVBQZX 8(CX), DI - JMP loop_condition - -main_loop: - // Check if we have room for 4 bytes in the output buffer - LEAQ 4(DX), CX - CMPQ CX, BX - JGE error_max_decoded_size_exceeded - - // Decode 4 values - CMPQ R11, $0x20 - JL bitReader_fillFast_1_end - SUBQ $0x20, R11 - SUBQ $0x04, R9 - MOVL (R8)(R9*1), CX - SHLXQ R11, CX, CX - ORQ CX, R10 - -bitReader_fillFast_1_end: - SHRXQ DI, R10, CX - MOVW (SI)(CX*2), CX - MOVB CH, AL - MOVBQZX CL, CX - ADDQ CX, R11 - SHLXQ CX, R10, R10 - SHRXQ DI, R10, CX - MOVW (SI)(CX*2), CX - MOVB CH, AH - MOVBQZX CL, CX - ADDQ CX, R11 - SHLXQ CX, R10, R10 - BSWAPL AX - CMPQ R11, $0x20 - JL bitReader_fillFast_2_end - SUBQ $0x20, R11 - SUBQ $0x04, R9 - MOVL (R8)(R9*1), CX - SHLXQ R11, CX, CX - ORQ CX, R10 - -bitReader_fillFast_2_end: - SHRXQ DI, R10, CX - MOVW (SI)(CX*2), CX - MOVB CH, AH - MOVBQZX CL, CX - ADDQ CX, R11 - SHLXQ CX, R10, R10 - SHRXQ DI, R10, CX - MOVW (SI)(CX*2), CX - MOVB CH, AL - MOVBQZX CL, CX - ADDQ CX, R11 - SHLXQ CX, R10, R10 - BSWAPL AX - - // Store the decoded values - MOVL AX, (DX) - ADDQ $0x04, DX - -loop_condition: - CMPQ R9, $0x08 - JGE main_loop - - // Update ctx structure - MOVQ ctx+0(FP), AX - SUBQ 16(AX), DX - MOVQ DX, 40(AX) - MOVQ (AX), AX - MOVQ R9, 24(AX) - MOVQ R10, 32(AX) - MOVB R11, 40(AX) - RET - - // Report error -error_max_decoded_size_exceeded: - MOVQ ctx+0(FP), AX - MOVQ $-1, CX - MOVQ CX, 40(AX) - RET diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_generic.go b/vendor/github.com/klauspost/compress/huff0/decompress_generic.go deleted file mode 100644 index 908c17de6..000000000 --- a/vendor/github.com/klauspost/compress/huff0/decompress_generic.go +++ /dev/null @@ -1,299 +0,0 @@ -//go:build !amd64 || appengine || !gc || noasm -// +build !amd64 appengine !gc noasm - -// This file contains a generic implementation of Decoder.Decompress4X. -package huff0 - -import ( - "errors" - "fmt" -) - -// Decompress4X will decompress a 4X encoded stream. -// The length of the supplied input must match the end of a block exactly. -// The *capacity* of the dst slice must match the destination size of -// the uncompressed data exactly. -func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { - if len(d.dt.single) == 0 { - return nil, errors.New("no table loaded") - } - if len(src) < 6+(4*1) { - return nil, errors.New("input too small") - } - if use8BitTables && d.actualTableLog <= 8 { - return d.decompress4X8bit(dst, src) - } - - var br [4]bitReaderShifted - // Decode "jump table" - start := 6 - for i := 0; i < 3; i++ { - length := int(src[i*2]) | (int(src[i*2+1]) << 8) - if start+length >= len(src) { - return nil, errors.New("truncated input (or invalid offset)") - } - err := br[i].init(src[start : start+length]) - if err != nil { - return nil, err - } - start += length - } - err := br[3].init(src[start:]) - if err != nil { - return nil, err - } - - // destination, offset to match first output - dstSize := cap(dst) - dst = dst[:dstSize] - out := dst - dstEvery := (dstSize + 3) / 4 - - const tlSize = 1 << tableLogMax - const tlMask = tlSize - 1 - single := d.dt.single[:tlSize] - - // Use temp table to avoid bound checks/append penalty. - buf := d.buffer() - var off uint8 - var decoded int - - // Decode 2 values from each decoder/loop. - const bufoff = 256 - for { - if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { - break - } - - { - const stream = 0 - const stream2 = 1 - br[stream].fillFast() - br[stream2].fillFast() - - val := br[stream].peekBitsFast(d.actualTableLog) - val2 := br[stream2].peekBitsFast(d.actualTableLog) - v := single[val&tlMask] - v2 := single[val2&tlMask] - br[stream].advance(uint8(v.entry)) - br[stream2].advance(uint8(v2.entry)) - buf[stream][off] = uint8(v.entry >> 8) - buf[stream2][off] = uint8(v2.entry >> 8) - - val = br[stream].peekBitsFast(d.actualTableLog) - val2 = br[stream2].peekBitsFast(d.actualTableLog) - v = single[val&tlMask] - v2 = single[val2&tlMask] - br[stream].advance(uint8(v.entry)) - br[stream2].advance(uint8(v2.entry)) - buf[stream][off+1] = uint8(v.entry >> 8) - buf[stream2][off+1] = uint8(v2.entry >> 8) - } - - { - const stream = 2 - const stream2 = 3 - br[stream].fillFast() - br[stream2].fillFast() - - val := br[stream].peekBitsFast(d.actualTableLog) - val2 := br[stream2].peekBitsFast(d.actualTableLog) - v := single[val&tlMask] - v2 := single[val2&tlMask] - br[stream].advance(uint8(v.entry)) - br[stream2].advance(uint8(v2.entry)) - buf[stream][off] = uint8(v.entry >> 8) - buf[stream2][off] = uint8(v2.entry >> 8) - - val = br[stream].peekBitsFast(d.actualTableLog) - val2 = br[stream2].peekBitsFast(d.actualTableLog) - v = single[val&tlMask] - v2 = single[val2&tlMask] - br[stream].advance(uint8(v.entry)) - br[stream2].advance(uint8(v2.entry)) - buf[stream][off+1] = uint8(v.entry >> 8) - buf[stream2][off+1] = uint8(v2.entry >> 8) - } - - off += 2 - - if off == 0 { - if bufoff > dstEvery { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 1") - } - // There must at least be 3 buffers left. - if len(out)-bufoff < dstEvery*3 { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 2") - } - //copy(out, buf[0][:]) - //copy(out[dstEvery:], buf[1][:]) - //copy(out[dstEvery*2:], buf[2][:]) - //copy(out[dstEvery*3:], buf[3][:]) - *(*[bufoff]byte)(out) = buf[0] - *(*[bufoff]byte)(out[dstEvery:]) = buf[1] - *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2] - *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3] - out = out[bufoff:] - decoded += bufoff * 4 - } - } - if off > 0 { - ioff := int(off) - if len(out) < dstEvery*3+ioff { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 3") - } - copy(out, buf[0][:off]) - copy(out[dstEvery:], buf[1][:off]) - copy(out[dstEvery*2:], buf[2][:off]) - copy(out[dstEvery*3:], buf[3][:off]) - decoded += int(off) * 4 - out = out[off:] - } - - // Decode remaining. - remainBytes := dstEvery - (decoded / 4) - for i := range br { - offset := dstEvery * i - endsAt := offset + remainBytes - if endsAt > len(out) { - endsAt = len(out) - } - br := &br[i] - bitsLeft := br.remaining() - for bitsLeft > 0 { - br.fill() - if offset >= endsAt { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 4") - } - - // Read value and increment offset. - val := br.peekBitsFast(d.actualTableLog) - v := single[val&tlMask].entry - nBits := uint8(v) - br.advance(nBits) - bitsLeft -= uint(nBits) - out[offset] = uint8(v >> 8) - offset++ - } - if offset != endsAt { - d.bufs.Put(buf) - return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) - } - decoded += offset - dstEvery*i - err = br.close() - if err != nil { - return nil, err - } - } - d.bufs.Put(buf) - if dstSize != decoded { - return nil, errors.New("corruption detected: short output block") - } - return dst, nil -} - -// Decompress1X will decompress a 1X encoded stream. -// The cap of the output buffer will be the maximum decompressed size. -// The length of the supplied input must match the end of a block exactly. -func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { - if len(d.dt.single) == 0 { - return nil, errors.New("no table loaded") - } - if use8BitTables && d.actualTableLog <= 8 { - return d.decompress1X8Bit(dst, src) - } - var br bitReaderShifted - err := br.init(src) - if err != nil { - return dst, err - } - maxDecodedSize := cap(dst) - dst = dst[:0] - - // Avoid bounds check by always having full sized table. - const tlSize = 1 << tableLogMax - const tlMask = tlSize - 1 - dt := d.dt.single[:tlSize] - - // Use temp table to avoid bound checks/append penalty. - bufs := d.buffer() - buf := &bufs[0] - var off uint8 - - for br.off >= 8 { - br.fillFast() - v := dt[br.peekBitsFast(d.actualTableLog)&tlMask] - br.advance(uint8(v.entry)) - buf[off+0] = uint8(v.entry >> 8) - - v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] - br.advance(uint8(v.entry)) - buf[off+1] = uint8(v.entry >> 8) - - // Refill - br.fillFast() - - v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] - br.advance(uint8(v.entry)) - buf[off+2] = uint8(v.entry >> 8) - - v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] - br.advance(uint8(v.entry)) - buf[off+3] = uint8(v.entry >> 8) - - off += 4 - if off == 0 { - if len(dst)+256 > maxDecodedSize { - br.close() - d.bufs.Put(bufs) - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:]...) - } - } - - if len(dst)+int(off) > maxDecodedSize { - d.bufs.Put(bufs) - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:off]...) - - // br < 8, so uint8 is fine - bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead - for bitsLeft > 0 { - br.fill() - if false && br.bitsRead >= 32 { - if br.off >= 4 { - v := br.in[br.off-4:] - v = v[:4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - br.value = (br.value << 32) | uint64(low) - br.bitsRead -= 32 - br.off -= 4 - } else { - for br.off > 0 { - br.value = (br.value << 8) | uint64(br.in[br.off-1]) - br.bitsRead -= 8 - br.off-- - } - } - } - if len(dst) >= maxDecodedSize { - d.bufs.Put(bufs) - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask] - nBits := uint8(v.entry) - br.advance(nBits) - bitsLeft -= nBits - dst = append(dst, uint8(v.entry>>8)) - } - d.bufs.Put(bufs) - return dst, br.close() -} diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go deleted file mode 100644 index 77ecd68e0..000000000 --- a/vendor/github.com/klauspost/compress/huff0/huff0.go +++ /dev/null @@ -1,337 +0,0 @@ -// Package huff0 provides fast huffman encoding as used in zstd. -// -// See README.md at https://github.com/klauspost/compress/tree/master/huff0 for details. -package huff0 - -import ( - "errors" - "fmt" - "math" - "math/bits" - "sync" - - "github.com/klauspost/compress/fse" -) - -const ( - maxSymbolValue = 255 - - // zstandard limits tablelog to 11, see: - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#huffman-tree-description - tableLogMax = 11 - tableLogDefault = 11 - minTablelog = 5 - huffNodesLen = 512 - - // BlockSizeMax is maximum input size for a single block uncompressed. - BlockSizeMax = 1<<18 - 1 -) - -var ( - // ErrIncompressible is returned when input is judged to be too hard to compress. - ErrIncompressible = errors.New("input is not compressible") - - // ErrUseRLE is returned from the compressor when the input is a single byte value repeated. - ErrUseRLE = errors.New("input is single value repeated") - - // ErrTooBig is return if input is too large for a single block. - ErrTooBig = errors.New("input too big") - - // ErrMaxDecodedSizeExceeded is return if input is too large for a single block. - ErrMaxDecodedSizeExceeded = errors.New("maximum output size exceeded") -) - -type ReusePolicy uint8 - -const ( - // ReusePolicyAllow will allow reuse if it produces smaller output. - ReusePolicyAllow ReusePolicy = iota - - // ReusePolicyPrefer will re-use aggressively if possible. - // This will not check if a new table will produce smaller output, - // except if the current table is impossible to use or - // compressed output is bigger than input. - ReusePolicyPrefer - - // ReusePolicyNone will disable re-use of tables. - // This is slightly faster than ReusePolicyAllow but may produce larger output. - ReusePolicyNone - - // ReusePolicyMust must allow reuse and produce smaller output. - ReusePolicyMust -) - -type Scratch struct { - count [maxSymbolValue + 1]uint32 - - // Per block parameters. - // These can be used to override compression parameters of the block. - // Do not touch, unless you know what you are doing. - - // Out is output buffer. - // If the scratch is re-used before the caller is done processing the output, - // set this field to nil. - // Otherwise the output buffer will be re-used for next Compression/Decompression step - // and allocation will be avoided. - Out []byte - - // OutTable will contain the table data only, if a new table has been generated. - // Slice of the returned data. - OutTable []byte - - // OutData will contain the compressed data. - // Slice of the returned data. - OutData []byte - - // MaxDecodedSize will set the maximum allowed output size. - // This value will automatically be set to BlockSizeMax if not set. - // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. - MaxDecodedSize int - - srcLen int - - // MaxSymbolValue will override the maximum symbol value of the next block. - MaxSymbolValue uint8 - - // TableLog will attempt to override the tablelog for the next block. - // Must be <= 11 and >= 5. - TableLog uint8 - - // Reuse will specify the reuse policy - Reuse ReusePolicy - - // WantLogLess allows to specify a log 2 reduction that should at least be achieved, - // otherwise the block will be returned as incompressible. - // The reduction should then at least be (input size >> WantLogLess) - // If WantLogLess == 0 any improvement will do. - WantLogLess uint8 - - symbolLen uint16 // Length of active part of the symbol table. - maxCount int // count of the most probable symbol - clearCount bool // clear count - actualTableLog uint8 // Selected tablelog. - prevTableLog uint8 // Tablelog for previous table - prevTable cTable // Table used for previous compression. - cTable cTable // compression table - dt dTable // decompression table - nodes []nodeElt - tmpOut [4][]byte - fse *fse.Scratch - decPool sync.Pool // *[4][256]byte buffers. - huffWeight [maxSymbolValue + 1]byte -} - -// TransferCTable will transfer the previously used compression table. -func (s *Scratch) TransferCTable(src *Scratch) { - if cap(s.prevTable) < len(src.prevTable) { - s.prevTable = make(cTable, 0, maxSymbolValue+1) - } - s.prevTable = s.prevTable[:len(src.prevTable)] - copy(s.prevTable, src.prevTable) - s.prevTableLog = src.prevTableLog -} - -func (s *Scratch) prepare(in []byte) (*Scratch, error) { - if len(in) > BlockSizeMax { - return nil, ErrTooBig - } - if s == nil { - s = &Scratch{} - } - if s.MaxSymbolValue == 0 { - s.MaxSymbolValue = maxSymbolValue - } - if s.TableLog == 0 { - s.TableLog = tableLogDefault - } - if s.TableLog > tableLogMax || s.TableLog < minTablelog { - return nil, fmt.Errorf(" invalid tableLog %d (%d -> %d)", s.TableLog, minTablelog, tableLogMax) - } - if s.MaxDecodedSize <= 0 || s.MaxDecodedSize > BlockSizeMax { - s.MaxDecodedSize = BlockSizeMax - } - if s.clearCount && s.maxCount == 0 { - for i := range s.count { - s.count[i] = 0 - } - s.clearCount = false - } - if cap(s.Out) == 0 { - s.Out = make([]byte, 0, len(in)) - } - s.Out = s.Out[:0] - - s.OutTable = nil - s.OutData = nil - if cap(s.nodes) < huffNodesLen+1 { - s.nodes = make([]nodeElt, 0, huffNodesLen+1) - } - s.nodes = s.nodes[:0] - if s.fse == nil { - s.fse = &fse.Scratch{} - } - s.srcLen = len(in) - - return s, nil -} - -type cTable []cTableEntry - -func (c cTable) write(s *Scratch) error { - var ( - // precomputed conversion table - bitsToWeight [tableLogMax + 1]byte - huffLog = s.actualTableLog - // last weight is not saved. - maxSymbolValue = uint8(s.symbolLen - 1) - huffWeight = s.huffWeight[:256] - ) - const ( - maxFSETableLog = 6 - ) - // convert to weight - bitsToWeight[0] = 0 - for n := uint8(1); n < huffLog+1; n++ { - bitsToWeight[n] = huffLog + 1 - n - } - - // Acquire histogram for FSE. - hist := s.fse.Histogram() - hist = hist[:256] - for i := range hist[:16] { - hist[i] = 0 - } - for n := uint8(0); n < maxSymbolValue; n++ { - v := bitsToWeight[c[n].nBits] & 15 - huffWeight[n] = v - hist[v]++ - } - - // FSE compress if feasible. - if maxSymbolValue >= 2 { - huffMaxCnt := uint32(0) - huffMax := uint8(0) - for i, v := range hist[:16] { - if v == 0 { - continue - } - huffMax = byte(i) - if v > huffMaxCnt { - huffMaxCnt = v - } - } - s.fse.HistogramFinished(huffMax, int(huffMaxCnt)) - s.fse.TableLog = maxFSETableLog - b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse) - if err == nil && len(b) < int(s.symbolLen>>1) { - s.Out = append(s.Out, uint8(len(b))) - s.Out = append(s.Out, b...) - return nil - } - // Unable to compress (RLE/uncompressible) - } - // write raw values as 4-bits (max : 15) - if maxSymbolValue > (256 - 128) { - // should not happen : likely means source cannot be compressed - return ErrIncompressible - } - op := s.Out - // special case, pack weights 4 bits/weight. - op = append(op, 128|(maxSymbolValue-1)) - // be sure it doesn't cause msan issue in final combination - huffWeight[maxSymbolValue] = 0 - for n := uint16(0); n < uint16(maxSymbolValue); n += 2 { - op = append(op, (huffWeight[n]<<4)|huffWeight[n+1]) - } - s.Out = op - return nil -} - -func (c cTable) estTableSize(s *Scratch) (sz int, err error) { - var ( - // precomputed conversion table - bitsToWeight [tableLogMax + 1]byte - huffLog = s.actualTableLog - // last weight is not saved. - maxSymbolValue = uint8(s.symbolLen - 1) - huffWeight = s.huffWeight[:256] - ) - const ( - maxFSETableLog = 6 - ) - // convert to weight - bitsToWeight[0] = 0 - for n := uint8(1); n < huffLog+1; n++ { - bitsToWeight[n] = huffLog + 1 - n - } - - // Acquire histogram for FSE. - hist := s.fse.Histogram() - hist = hist[:256] - for i := range hist[:16] { - hist[i] = 0 - } - for n := uint8(0); n < maxSymbolValue; n++ { - v := bitsToWeight[c[n].nBits] & 15 - huffWeight[n] = v - hist[v]++ - } - - // FSE compress if feasible. - if maxSymbolValue >= 2 { - huffMaxCnt := uint32(0) - huffMax := uint8(0) - for i, v := range hist[:16] { - if v == 0 { - continue - } - huffMax = byte(i) - if v > huffMaxCnt { - huffMaxCnt = v - } - } - s.fse.HistogramFinished(huffMax, int(huffMaxCnt)) - s.fse.TableLog = maxFSETableLog - b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse) - if err == nil && len(b) < int(s.symbolLen>>1) { - sz += 1 + len(b) - return sz, nil - } - // Unable to compress (RLE/uncompressible) - } - // write raw values as 4-bits (max : 15) - if maxSymbolValue > (256 - 128) { - // should not happen : likely means source cannot be compressed - return 0, ErrIncompressible - } - // special case, pack weights 4 bits/weight. - sz += 1 + int(maxSymbolValue/2) - return sz, nil -} - -// estimateSize returns the estimated size in bytes of the input represented in the -// histogram supplied. -func (c cTable) estimateSize(hist []uint32) int { - nbBits := uint32(7) - for i, v := range c[:len(hist)] { - nbBits += uint32(v.nBits) * hist[i] - } - return int(nbBits >> 3) -} - -// minSize returns the minimum possible size considering the shannon limit. -func (s *Scratch) minSize(total int) int { - nbBits := float64(7) - fTotal := float64(total) - for _, v := range s.count[:s.symbolLen] { - n := float64(v) - if n > 0 { - nbBits += math.Log2(fTotal/n) * n - } - } - return int(nbBits) >> 3 -} - -func highBit32(val uint32) (n uint32) { - return uint32(bits.Len32(val) - 1) -} diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go deleted file mode 100644 index 3954c5121..000000000 --- a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go +++ /dev/null @@ -1,34 +0,0 @@ -// Package cpuinfo gives runtime info about the current CPU. -// -// This is a very limited module meant for use internally -// in this project. For more versatile solution check -// https://github.com/klauspost/cpuid. -package cpuinfo - -// HasBMI1 checks whether an x86 CPU supports the BMI1 extension. -func HasBMI1() bool { - return hasBMI1 -} - -// HasBMI2 checks whether an x86 CPU supports the BMI2 extension. -func HasBMI2() bool { - return hasBMI2 -} - -// DisableBMI2 will disable BMI2, for testing purposes. -// Call returned function to restore previous state. -func DisableBMI2() func() { - old := hasBMI2 - hasBMI2 = false - return func() { - hasBMI2 = old - } -} - -// HasBMI checks whether an x86 CPU supports both BMI1 and BMI2 extensions. -func HasBMI() bool { - return HasBMI1() && HasBMI2() -} - -var hasBMI1 bool -var hasBMI2 bool diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go deleted file mode 100644 index e802579c4..000000000 --- a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build amd64 && !appengine && !noasm && gc -// +build amd64,!appengine,!noasm,gc - -package cpuinfo - -// go:noescape -func x86extensions() (bmi1, bmi2 bool) - -func init() { - hasBMI1, hasBMI2 = x86extensions() -} diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s deleted file mode 100644 index 4465fbe9e..000000000 --- a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s +++ /dev/null @@ -1,36 +0,0 @@ -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" -#include "funcdata.h" -#include "go_asm.h" - -TEXT ·x86extensions(SB), NOSPLIT, $0 - // 1. determine max EAX value - XORQ AX, AX - CPUID - - CMPQ AX, $7 - JB unsupported - - // 2. EAX = 7, ECX = 0 --- see Table 3-8 "Information Returned by CPUID Instruction" - MOVQ $7, AX - MOVQ $0, CX - CPUID - - BTQ $3, BX // bit 3 = BMI1 - SETCS AL - - BTQ $8, BX // bit 8 = BMI2 - SETCS AH - - MOVB AL, bmi1+0(FP) - MOVB AH, bmi2+1(FP) - RET - -unsupported: - XORQ AX, AX - MOVB AL, bmi1+0(FP) - MOVB AL, bmi2+1(FP) - RET diff --git a/vendor/github.com/klauspost/compress/internal/snapref/LICENSE b/vendor/github.com/klauspost/compress/internal/snapref/LICENSE deleted file mode 100644 index 6050c10f4..000000000 --- a/vendor/github.com/klauspost/compress/internal/snapref/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/klauspost/compress/internal/snapref/decode.go b/vendor/github.com/klauspost/compress/internal/snapref/decode.go deleted file mode 100644 index 40796a49d..000000000 --- a/vendor/github.com/klauspost/compress/internal/snapref/decode.go +++ /dev/null @@ -1,264 +0,0 @@ -// Copyright 2011 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package snapref - -import ( - "encoding/binary" - "errors" - "io" -) - -var ( - // ErrCorrupt reports that the input is invalid. - ErrCorrupt = errors.New("snappy: corrupt input") - // ErrTooLarge reports that the uncompressed length is too large. - ErrTooLarge = errors.New("snappy: decoded block is too large") - // ErrUnsupported reports that the input isn't supported. - ErrUnsupported = errors.New("snappy: unsupported input") - - errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length") -) - -// DecodedLen returns the length of the decoded block. -func DecodedLen(src []byte) (int, error) { - v, _, err := decodedLen(src) - return v, err -} - -// decodedLen returns the length of the decoded block and the number of bytes -// that the length header occupied. -func decodedLen(src []byte) (blockLen, headerLen int, err error) { - v, n := binary.Uvarint(src) - if n <= 0 || v > 0xffffffff { - return 0, 0, ErrCorrupt - } - - const wordSize = 32 << (^uint(0) >> 32 & 1) - if wordSize == 32 && v > 0x7fffffff { - return 0, 0, ErrTooLarge - } - return int(v), n, nil -} - -const ( - decodeErrCodeCorrupt = 1 - decodeErrCodeUnsupportedLiteralLength = 2 -) - -// Decode returns the decoded form of src. The returned slice may be a sub- -// slice of dst if dst was large enough to hold the entire decoded block. -// Otherwise, a newly allocated slice will be returned. -// -// The dst and src must not overlap. It is valid to pass a nil dst. -// -// Decode handles the Snappy block format, not the Snappy stream format. -func Decode(dst, src []byte) ([]byte, error) { - dLen, s, err := decodedLen(src) - if err != nil { - return nil, err - } - if dLen <= len(dst) { - dst = dst[:dLen] - } else { - dst = make([]byte, dLen) - } - switch decode(dst, src[s:]) { - case 0: - return dst, nil - case decodeErrCodeUnsupportedLiteralLength: - return nil, errUnsupportedLiteralLength - } - return nil, ErrCorrupt -} - -// NewReader returns a new Reader that decompresses from r, using the framing -// format described at -// https://github.com/google/snappy/blob/master/framing_format.txt -func NewReader(r io.Reader) *Reader { - return &Reader{ - r: r, - decoded: make([]byte, maxBlockSize), - buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize), - } -} - -// Reader is an io.Reader that can read Snappy-compressed bytes. -// -// Reader handles the Snappy stream format, not the Snappy block format. -type Reader struct { - r io.Reader - err error - decoded []byte - buf []byte - // decoded[i:j] contains decoded bytes that have not yet been passed on. - i, j int - readHeader bool -} - -// Reset discards any buffered data, resets all state, and switches the Snappy -// reader to read from r. This permits reusing a Reader rather than allocating -// a new one. -func (r *Reader) Reset(reader io.Reader) { - r.r = reader - r.err = nil - r.i = 0 - r.j = 0 - r.readHeader = false -} - -func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { - if _, r.err = io.ReadFull(r.r, p); r.err != nil { - if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { - r.err = ErrCorrupt - } - return false - } - return true -} - -func (r *Reader) fill() error { - for r.i >= r.j { - if !r.readFull(r.buf[:4], true) { - return r.err - } - chunkType := r.buf[0] - if !r.readHeader { - if chunkType != chunkTypeStreamIdentifier { - r.err = ErrCorrupt - return r.err - } - r.readHeader = true - } - chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 - if chunkLen > len(r.buf) { - r.err = ErrUnsupported - return r.err - } - - // The chunk types are specified at - // https://github.com/google/snappy/blob/master/framing_format.txt - switch chunkType { - case chunkTypeCompressedData: - // Section 4.2. Compressed data (chunk type 0x00). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return r.err - } - buf := r.buf[:chunkLen] - if !r.readFull(buf, false) { - return r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - buf = buf[checksumSize:] - - n, err := DecodedLen(buf) - if err != nil { - r.err = err - return r.err - } - if n > len(r.decoded) { - r.err = ErrCorrupt - return r.err - } - if _, err := Decode(r.decoded, buf); err != nil { - r.err = err - return r.err - } - if crc(r.decoded[:n]) != checksum { - r.err = ErrCorrupt - return r.err - } - r.i, r.j = 0, n - continue - - case chunkTypeUncompressedData: - // Section 4.3. Uncompressed data (chunk type 0x01). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return r.err - } - buf := r.buf[:checksumSize] - if !r.readFull(buf, false) { - return r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - // Read directly into r.decoded instead of via r.buf. - n := chunkLen - checksumSize - if n > len(r.decoded) { - r.err = ErrCorrupt - return r.err - } - if !r.readFull(r.decoded[:n], false) { - return r.err - } - if crc(r.decoded[:n]) != checksum { - r.err = ErrCorrupt - return r.err - } - r.i, r.j = 0, n - continue - - case chunkTypeStreamIdentifier: - // Section 4.1. Stream identifier (chunk type 0xff). - if chunkLen != len(magicBody) { - r.err = ErrCorrupt - return r.err - } - if !r.readFull(r.buf[:len(magicBody)], false) { - return r.err - } - for i := 0; i < len(magicBody); i++ { - if r.buf[i] != magicBody[i] { - r.err = ErrCorrupt - return r.err - } - } - continue - } - - if chunkType <= 0x7f { - // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). - r.err = ErrUnsupported - return r.err - } - // Section 4.4 Padding (chunk type 0xfe). - // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). - if !r.readFull(r.buf[:chunkLen], false) { - return r.err - } - } - - return nil -} - -// Read satisfies the io.Reader interface. -func (r *Reader) Read(p []byte) (int, error) { - if r.err != nil { - return 0, r.err - } - - if err := r.fill(); err != nil { - return 0, err - } - - n := copy(p, r.decoded[r.i:r.j]) - r.i += n - return n, nil -} - -// ReadByte satisfies the io.ByteReader interface. -func (r *Reader) ReadByte() (byte, error) { - if r.err != nil { - return 0, r.err - } - - if err := r.fill(); err != nil { - return 0, err - } - - c := r.decoded[r.i] - r.i++ - return c, nil -} diff --git a/vendor/github.com/klauspost/compress/internal/snapref/decode_other.go b/vendor/github.com/klauspost/compress/internal/snapref/decode_other.go deleted file mode 100644 index 77395a6b8..000000000 --- a/vendor/github.com/klauspost/compress/internal/snapref/decode_other.go +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package snapref - -// decode writes the decoding of src to dst. It assumes that the varint-encoded -// length of the decompressed bytes has already been read, and that len(dst) -// equals that length. -// -// It returns 0 on success or a decodeErrCodeXxx error code on failure. -func decode(dst, src []byte) int { - var d, s, offset, length int - for s < len(src) { - switch src[s] & 0x03 { - case tagLiteral: - x := uint32(src[s] >> 2) - switch { - case x < 60: - s++ - case x == 60: - s += 2 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-1]) - case x == 61: - s += 3 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-2]) | uint32(src[s-1])<<8 - case x == 62: - s += 4 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 - case x == 63: - s += 5 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 - } - length = int(x) + 1 - if length <= 0 { - return decodeErrCodeUnsupportedLiteralLength - } - if length > len(dst)-d || length > len(src)-s { - return decodeErrCodeCorrupt - } - copy(dst[d:], src[s:s+length]) - d += length - s += length - continue - - case tagCopy1: - s += 2 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - length = 4 + int(src[s-2])>>2&0x7 - offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) - - case tagCopy2: - s += 3 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - length = 1 + int(src[s-3])>>2 - offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) - - case tagCopy4: - s += 5 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - length = 1 + int(src[s-5])>>2 - offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) - } - - if offset <= 0 || d < offset || length > len(dst)-d { - return decodeErrCodeCorrupt - } - // Copy from an earlier sub-slice of dst to a later sub-slice. - // If no overlap, use the built-in copy: - if offset >= length { - copy(dst[d:d+length], dst[d-offset:]) - d += length - continue - } - - // Unlike the built-in copy function, this byte-by-byte copy always runs - // forwards, even if the slices overlap. Conceptually, this is: - // - // d += forwardCopy(dst[d:d+length], dst[d-offset:]) - // - // We align the slices into a and b and show the compiler they are the same size. - // This allows the loop to run without bounds checks. - a := dst[d : d+length] - b := dst[d-offset:] - b = b[:len(a)] - for i := range a { - a[i] = b[i] - } - d += length - } - if d != len(dst) { - return decodeErrCodeCorrupt - } - return 0 -} diff --git a/vendor/github.com/klauspost/compress/internal/snapref/encode.go b/vendor/github.com/klauspost/compress/internal/snapref/encode.go deleted file mode 100644 index 13c6040a5..000000000 --- a/vendor/github.com/klauspost/compress/internal/snapref/encode.go +++ /dev/null @@ -1,289 +0,0 @@ -// Copyright 2011 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package snapref - -import ( - "encoding/binary" - "errors" - "io" -) - -// Encode returns the encoded form of src. The returned slice may be a sub- -// slice of dst if dst was large enough to hold the entire encoded block. -// Otherwise, a newly allocated slice will be returned. -// -// The dst and src must not overlap. It is valid to pass a nil dst. -// -// Encode handles the Snappy block format, not the Snappy stream format. -func Encode(dst, src []byte) []byte { - if n := MaxEncodedLen(len(src)); n < 0 { - panic(ErrTooLarge) - } else if len(dst) < n { - dst = make([]byte, n) - } - - // The block starts with the varint-encoded length of the decompressed bytes. - d := binary.PutUvarint(dst, uint64(len(src))) - - for len(src) > 0 { - p := src - src = nil - if len(p) > maxBlockSize { - p, src = p[:maxBlockSize], p[maxBlockSize:] - } - if len(p) < minNonLiteralBlockSize { - d += emitLiteral(dst[d:], p) - } else { - d += encodeBlock(dst[d:], p) - } - } - return dst[:d] -} - -// inputMargin is the minimum number of extra input bytes to keep, inside -// encodeBlock's inner loop. On some architectures, this margin lets us -// implement a fast path for emitLiteral, where the copy of short (<= 16 byte) -// literals can be implemented as a single load to and store from a 16-byte -// register. That literal's actual length can be as short as 1 byte, so this -// can copy up to 15 bytes too much, but that's OK as subsequent iterations of -// the encoding loop will fix up the copy overrun, and this inputMargin ensures -// that we don't overrun the dst and src buffers. -const inputMargin = 16 - 1 - -// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that -// could be encoded with a copy tag. This is the minimum with respect to the -// algorithm used by encodeBlock, not a minimum enforced by the file format. -// -// The encoded output must start with at least a 1 byte literal, as there are -// no previous bytes to copy. A minimal (1 byte) copy after that, generated -// from an emitCopy call in encodeBlock's main loop, would require at least -// another inputMargin bytes, for the reason above: we want any emitLiteral -// calls inside encodeBlock's main loop to use the fast path if possible, which -// requires being able to overrun by inputMargin bytes. Thus, -// minNonLiteralBlockSize equals 1 + 1 + inputMargin. -// -// The C++ code doesn't use this exact threshold, but it could, as discussed at -// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion -// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an -// optimization. It should not affect the encoded form. This is tested by -// TestSameEncodingAsCppShortCopies. -const minNonLiteralBlockSize = 1 + 1 + inputMargin - -// MaxEncodedLen returns the maximum length of a snappy block, given its -// uncompressed length. -// -// It will return a negative value if srcLen is too large to encode. -func MaxEncodedLen(srcLen int) int { - n := uint64(srcLen) - if n > 0xffffffff { - return -1 - } - // Compressed data can be defined as: - // compressed := item* literal* - // item := literal* copy - // - // The trailing literal sequence has a space blowup of at most 62/60 - // since a literal of length 60 needs one tag byte + one extra byte - // for length information. - // - // Item blowup is trickier to measure. Suppose the "copy" op copies - // 4 bytes of data. Because of a special check in the encoding code, - // we produce a 4-byte copy only if the offset is < 65536. Therefore - // the copy op takes 3 bytes to encode, and this type of item leads - // to at most the 62/60 blowup for representing literals. - // - // Suppose the "copy" op copies 5 bytes of data. If the offset is big - // enough, it will take 5 bytes to encode the copy op. Therefore the - // worst case here is a one-byte literal followed by a five-byte copy. - // That is, 6 bytes of input turn into 7 bytes of "compressed" data. - // - // This last factor dominates the blowup, so the final estimate is: - n = 32 + n + n/6 - if n > 0xffffffff { - return -1 - } - return int(n) -} - -var errClosed = errors.New("snappy: Writer is closed") - -// NewWriter returns a new Writer that compresses to w. -// -// The Writer returned does not buffer writes. There is no need to Flush or -// Close such a Writer. -// -// Deprecated: the Writer returned is not suitable for many small writes, only -// for few large writes. Use NewBufferedWriter instead, which is efficient -// regardless of the frequency and shape of the writes, and remember to Close -// that Writer when done. -func NewWriter(w io.Writer) *Writer { - return &Writer{ - w: w, - obuf: make([]byte, obufLen), - } -} - -// NewBufferedWriter returns a new Writer that compresses to w, using the -// framing format described at -// https://github.com/google/snappy/blob/master/framing_format.txt -// -// The Writer returned buffers writes. Users must call Close to guarantee all -// data has been forwarded to the underlying io.Writer. They may also call -// Flush zero or more times before calling Close. -func NewBufferedWriter(w io.Writer) *Writer { - return &Writer{ - w: w, - ibuf: make([]byte, 0, maxBlockSize), - obuf: make([]byte, obufLen), - } -} - -// Writer is an io.Writer that can write Snappy-compressed bytes. -// -// Writer handles the Snappy stream format, not the Snappy block format. -type Writer struct { - w io.Writer - err error - - // ibuf is a buffer for the incoming (uncompressed) bytes. - // - // Its use is optional. For backwards compatibility, Writers created by the - // NewWriter function have ibuf == nil, do not buffer incoming bytes, and - // therefore do not need to be Flush'ed or Close'd. - ibuf []byte - - // obuf is a buffer for the outgoing (compressed) bytes. - obuf []byte - - // wroteStreamHeader is whether we have written the stream header. - wroteStreamHeader bool -} - -// Reset discards the writer's state and switches the Snappy writer to write to -// w. This permits reusing a Writer rather than allocating a new one. -func (w *Writer) Reset(writer io.Writer) { - w.w = writer - w.err = nil - if w.ibuf != nil { - w.ibuf = w.ibuf[:0] - } - w.wroteStreamHeader = false -} - -// Write satisfies the io.Writer interface. -func (w *Writer) Write(p []byte) (nRet int, errRet error) { - if w.ibuf == nil { - // Do not buffer incoming bytes. This does not perform or compress well - // if the caller of Writer.Write writes many small slices. This - // behavior is therefore deprecated, but still supported for backwards - // compatibility with code that doesn't explicitly Flush or Close. - return w.write(p) - } - - // The remainder of this method is based on bufio.Writer.Write from the - // standard library. - - for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil { - var n int - if len(w.ibuf) == 0 { - // Large write, empty buffer. - // Write directly from p to avoid copy. - n, _ = w.write(p) - } else { - n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) - w.ibuf = w.ibuf[:len(w.ibuf)+n] - w.Flush() - } - nRet += n - p = p[n:] - } - if w.err != nil { - return nRet, w.err - } - n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) - w.ibuf = w.ibuf[:len(w.ibuf)+n] - nRet += n - return nRet, nil -} - -func (w *Writer) write(p []byte) (nRet int, errRet error) { - if w.err != nil { - return 0, w.err - } - for len(p) > 0 { - obufStart := len(magicChunk) - if !w.wroteStreamHeader { - w.wroteStreamHeader = true - copy(w.obuf, magicChunk) - obufStart = 0 - } - - var uncompressed []byte - if len(p) > maxBlockSize { - uncompressed, p = p[:maxBlockSize], p[maxBlockSize:] - } else { - uncompressed, p = p, nil - } - checksum := crc(uncompressed) - - // Compress the buffer, discarding the result if the improvement - // isn't at least 12.5%. - compressed := Encode(w.obuf[obufHeaderLen:], uncompressed) - chunkType := uint8(chunkTypeCompressedData) - chunkLen := 4 + len(compressed) - obufEnd := obufHeaderLen + len(compressed) - if len(compressed) >= len(uncompressed)-len(uncompressed)/8 { - chunkType = chunkTypeUncompressedData - chunkLen = 4 + len(uncompressed) - obufEnd = obufHeaderLen - } - - // Fill in the per-chunk header that comes before the body. - w.obuf[len(magicChunk)+0] = chunkType - w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0) - w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8) - w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16) - w.obuf[len(magicChunk)+4] = uint8(checksum >> 0) - w.obuf[len(magicChunk)+5] = uint8(checksum >> 8) - w.obuf[len(magicChunk)+6] = uint8(checksum >> 16) - w.obuf[len(magicChunk)+7] = uint8(checksum >> 24) - - if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil { - w.err = err - return nRet, err - } - if chunkType == chunkTypeUncompressedData { - if _, err := w.w.Write(uncompressed); err != nil { - w.err = err - return nRet, err - } - } - nRet += len(uncompressed) - } - return nRet, nil -} - -// Flush flushes the Writer to its underlying io.Writer. -func (w *Writer) Flush() error { - if w.err != nil { - return w.err - } - if len(w.ibuf) == 0 { - return nil - } - w.write(w.ibuf) - w.ibuf = w.ibuf[:0] - return w.err -} - -// Close calls Flush and then closes the Writer. -func (w *Writer) Close() error { - w.Flush() - ret := w.err - if w.err == nil { - w.err = errClosed - } - return ret -} diff --git a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go deleted file mode 100644 index 2754bac6f..000000000 --- a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go +++ /dev/null @@ -1,250 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package snapref - -func load32(b []byte, i int) uint32 { - b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 -} - -func load64(b []byte, i int) uint64 { - b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 -} - -// emitLiteral writes a literal chunk and returns the number of bytes written. -// -// It assumes that: -// -// dst is long enough to hold the encoded bytes -// 1 <= len(lit) && len(lit) <= 65536 -func emitLiteral(dst, lit []byte) int { - i, n := 0, uint(len(lit)-1) - switch { - case n < 60: - dst[0] = uint8(n)<<2 | tagLiteral - i = 1 - case n < 1<<8: - dst[0] = 60<<2 | tagLiteral - dst[1] = uint8(n) - i = 2 - default: - dst[0] = 61<<2 | tagLiteral - dst[1] = uint8(n) - dst[2] = uint8(n >> 8) - i = 3 - } - return i + copy(dst[i:], lit) -} - -// emitCopy writes a copy chunk and returns the number of bytes written. -// -// It assumes that: -// -// dst is long enough to hold the encoded bytes -// 1 <= offset && offset <= 65535 -// 4 <= length && length <= 65535 -func emitCopy(dst []byte, offset, length int) int { - i := 0 - // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The - // threshold for this loop is a little higher (at 68 = 64 + 4), and the - // length emitted down below is a little lower (at 60 = 64 - 4), because - // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed - // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as - // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as - // 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a - // tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an - // encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1. - for length >= 68 { - // Emit a length 64 copy, encoded as 3 bytes. - dst[i+0] = 63<<2 | tagCopy2 - dst[i+1] = uint8(offset) - dst[i+2] = uint8(offset >> 8) - i += 3 - length -= 64 - } - if length > 64 { - // Emit a length 60 copy, encoded as 3 bytes. - dst[i+0] = 59<<2 | tagCopy2 - dst[i+1] = uint8(offset) - dst[i+2] = uint8(offset >> 8) - i += 3 - length -= 60 - } - if length >= 12 || offset >= 2048 { - // Emit the remaining copy, encoded as 3 bytes. - dst[i+0] = uint8(length-1)<<2 | tagCopy2 - dst[i+1] = uint8(offset) - dst[i+2] = uint8(offset >> 8) - return i + 3 - } - // Emit the remaining copy, encoded as 2 bytes. - dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 - dst[i+1] = uint8(offset) - return i + 2 -} - -func hash(u, shift uint32) uint32 { - return (u * 0x1e35a7bd) >> shift -} - -// EncodeBlockInto exposes encodeBlock but checks dst size. -func EncodeBlockInto(dst, src []byte) (d int) { - if MaxEncodedLen(len(src)) > len(dst) { - return 0 - } - - // encodeBlock breaks on too big blocks, so split. - for len(src) > 0 { - p := src - src = nil - if len(p) > maxBlockSize { - p, src = p[:maxBlockSize], p[maxBlockSize:] - } - if len(p) < minNonLiteralBlockSize { - d += emitLiteral(dst[d:], p) - } else { - d += encodeBlock(dst[d:], p) - } - } - return d -} - -// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It -// assumes that the varint-encoded length of the decompressed bytes has already -// been written. -// -// It also assumes that: -// -// len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize -func encodeBlock(dst, src []byte) (d int) { - // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. - // The table element type is uint16, as s < sLimit and sLimit < len(src) - // and len(src) <= maxBlockSize and maxBlockSize == 65536. - const ( - maxTableSize = 1 << 14 - // tableMask is redundant, but helps the compiler eliminate bounds - // checks. - tableMask = maxTableSize - 1 - ) - shift := uint32(32 - 8) - for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { - shift-- - } - // In Go, all array elements are zero-initialized, so there is no advantage - // to a smaller tableSize per se. However, it matches the C++ algorithm, - // and in the asm versions of this code, we can get away with zeroing only - // the first tableSize elements. - var table [maxTableSize]uint16 - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := len(src) - inputMargin - - // nextEmit is where in src the next emitLiteral should start from. - nextEmit := 0 - - // The encoded form must start with a literal, as there are no previous - // bytes to copy, so we start looking for hash matches at s == 1. - s := 1 - nextHash := hash(load32(src, s), shift) - - for { - // Copied from the C++ snappy implementation: - // - // Heuristic match skipping: If 32 bytes are scanned with no matches - // found, start looking only at every other byte. If 32 more bytes are - // scanned (or skipped), look at every third byte, etc.. When a match - // is found, immediately go back to looking at every byte. This is a - // small loss (~5% performance, ~0.1% density) for compressible data - // due to more bookkeeping, but for non-compressible data (such as - // JPEG) it's a huge win since the compressor quickly "realizes" the - // data is incompressible and doesn't bother looking for matches - // everywhere. - // - // The "skip" variable keeps track of how many bytes there are since - // the last match; dividing it by 32 (ie. right-shifting by five) gives - // the number of bytes to move ahead for each iteration. - skip := 32 - - nextS := s - candidate := 0 - for { - s = nextS - bytesBetweenHashLookups := skip >> 5 - nextS = s + bytesBetweenHashLookups - skip += bytesBetweenHashLookups - if nextS > sLimit { - goto emitRemainder - } - candidate = int(table[nextHash&tableMask]) - table[nextHash&tableMask] = uint16(s) - nextHash = hash(load32(src, nextS), shift) - if load32(src, s) == load32(src, candidate) { - break - } - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - d += emitLiteral(dst[d:], src[nextEmit:s]) - - // Call emitCopy, and then see if another emitCopy could be our next - // move. Repeat until we find no match for the input immediately after - // what was consumed by the last emitCopy call. - // - // If we exit this loop normally then we need to call emitLiteral next, - // though we don't yet know how big the literal will be. We handle that - // by proceeding to the next iteration of the main loop. We also can - // exit this loop via goto if we get close to exhausting the input. - for { - // Invariant: we have a 4-byte match at s, and no need to emit any - // literal bytes prior to s. - base := s - - // Extend the 4-byte match as long as possible. - // - // This is an inlined version of: - // s = extendMatch(src, candidate+4, s+4) - s += 4 - for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 { - } - - d += emitCopy(dst[d:], base-candidate, s-base) - nextEmit = s - if s >= sLimit { - goto emitRemainder - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-1 and at s. If - // another emitCopy is not our next move, also calculate nextHash - // at s+1. At least on GOARCH=amd64, these three hash calculations - // are faster as one load64 call (with some shifts) instead of - // three load32 calls. - x := load64(src, s-1) - prevHash := hash(uint32(x>>0), shift) - table[prevHash&tableMask] = uint16(s - 1) - currHash := hash(uint32(x>>8), shift) - candidate = int(table[currHash&tableMask]) - table[currHash&tableMask] = uint16(s) - if uint32(x>>8) != load32(src, candidate) { - nextHash = hash(uint32(x>>16), shift) - s++ - break - } - } - } - -emitRemainder: - if nextEmit < len(src) { - d += emitLiteral(dst[d:], src[nextEmit:]) - } - return d -} diff --git a/vendor/github.com/klauspost/compress/internal/snapref/snappy.go b/vendor/github.com/klauspost/compress/internal/snapref/snappy.go deleted file mode 100644 index 34d01f4aa..000000000 --- a/vendor/github.com/klauspost/compress/internal/snapref/snappy.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2011 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package snapref implements the Snappy compression format. It aims for very -// high speeds and reasonable compression. -// -// There are actually two Snappy formats: block and stream. They are related, -// but different: trying to decompress block-compressed data as a Snappy stream -// will fail, and vice versa. The block format is the Decode and Encode -// functions and the stream format is the Reader and Writer types. -// -// The block format, the more common case, is used when the complete size (the -// number of bytes) of the original data is known upfront, at the time -// compression starts. The stream format, also known as the framing format, is -// for when that isn't always true. -// -// The canonical, C++ implementation is at https://github.com/google/snappy and -// it only implements the block format. -package snapref - -import ( - "hash/crc32" -) - -/* -Each encoded block begins with the varint-encoded length of the decoded data, -followed by a sequence of chunks. Chunks begin and end on byte boundaries. The -first byte of each chunk is broken into its 2 least and 6 most significant bits -called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. -Zero means a literal tag. All other values mean a copy tag. - -For literal tags: - - If m < 60, the next 1 + m bytes are literal bytes. - - Otherwise, let n be the little-endian unsigned integer denoted by the next - m - 59 bytes. The next 1 + n bytes after that are literal bytes. - -For copy tags, length bytes are copied from offset bytes ago, in the style of -Lempel-Ziv compression algorithms. In particular: - - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). - The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 - of the offset. The next byte is bits 0-7 of the offset. - - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). - The length is 1 + m. The offset is the little-endian unsigned integer - denoted by the next 2 bytes. - - For l == 3, this tag is a legacy format that is no longer issued by most - encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in - [1, 65). The length is 1 + m. The offset is the little-endian unsigned - integer denoted by the next 4 bytes. -*/ -const ( - tagLiteral = 0x00 - tagCopy1 = 0x01 - tagCopy2 = 0x02 - tagCopy4 = 0x03 -) - -const ( - checksumSize = 4 - chunkHeaderSize = 4 - magicChunk = "\xff\x06\x00\x00" + magicBody - magicBody = "sNaPpY" - - // maxBlockSize is the maximum size of the input to encodeBlock. It is not - // part of the wire format per se, but some parts of the encoder assume - // that an offset fits into a uint16. - // - // Also, for the framing format (Writer type instead of Encode function), - // https://github.com/google/snappy/blob/master/framing_format.txt says - // that "the uncompressed data in a chunk must be no longer than 65536 - // bytes". - maxBlockSize = 65536 - - // maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is - // hard coded to be a const instead of a variable, so that obufLen can also - // be a const. Their equivalence is confirmed by - // TestMaxEncodedLenOfMaxBlockSize. - maxEncodedLenOfMaxBlockSize = 76490 - - obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize - obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize -) - -const ( - chunkTypeCompressedData = 0x00 - chunkTypeUncompressedData = 0x01 - chunkTypePadding = 0xfe - chunkTypeStreamIdentifier = 0xff -) - -var crcTable = crc32.MakeTable(crc32.Castagnoli) - -// crc implements the checksum specified in section 3 of -// https://github.com/google/snappy/blob/master/framing_format.txt -func crc(b []byte) uint32 { - c := crc32.Update(0, crcTable, b) - return uint32(c>>15|c<<17) + 0xa282ead8 -} diff --git a/vendor/github.com/klauspost/compress/s2sx.mod b/vendor/github.com/klauspost/compress/s2sx.mod deleted file mode 100644 index 81bda5e29..000000000 --- a/vendor/github.com/klauspost/compress/s2sx.mod +++ /dev/null @@ -1,3 +0,0 @@ -module github.com/klauspost/compress - -go 1.22 diff --git a/vendor/github.com/klauspost/compress/s2sx.sum b/vendor/github.com/klauspost/compress/s2sx.sum deleted file mode 100644 index e69de29bb..000000000 diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md deleted file mode 100644 index c11d7fa28..000000000 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ /dev/null @@ -1,441 +0,0 @@ -# zstd - -[Zstandard](https://facebook.github.io/zstd/) is a real-time compression algorithm, providing high compression ratios. -It offers a very wide range of compression / speed trade-off, while being backed by a very fast decoder. -A high performance compression algorithm is implemented. For now focused on speed. - -This package provides [compression](#Compressor) to and [decompression](#Decompressor) of Zstandard content. - -This package is pure Go. Use `noasm` and `nounsafe` to disable relevant features. - -The `zstd` package is provided as open source software using a Go standard license. - -Currently the package is heavily optimized for 64 bit processors and will be significantly slower on 32 bit processors. - -For seekable zstd streams, see [this excellent package](https://github.com/SaveTheRbtz/zstd-seekable-format-go). - -## Installation - -Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`. - -[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/compress/zstd.svg)](https://pkg.go.dev/github.com/klauspost/compress/zstd) - -## Compressor - -### Status: - -STABLE - there may always be subtle bugs, a wide variety of content has been tested and the library is actively -used by several projects. This library is being [fuzz-tested](https://github.com/klauspost/compress-fuzz) for all updates. - -There may still be specific combinations of data types/size/settings that could lead to edge cases, -so as always, testing is recommended. - -For now, a high speed (fastest) and medium-fast (default) compressor has been implemented. - -* The "Fastest" compression ratio is roughly equivalent to zstd level 1. -* The "Default" compression ratio is roughly equivalent to zstd level 3 (default). -* The "Better" compression ratio is roughly equivalent to zstd level 7. -* The "Best" compression ratio is roughly equivalent to zstd level 11. - -In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. -The compression ratio compared to stdlib is around level 3, but usually 3x as fast. - - -### Usage - -An Encoder can be used for either compressing a stream via the -`io.WriteCloser` interface supported by the Encoder or as multiple independent -tasks via the `EncodeAll` function. -Smaller encodes are encouraged to use the EncodeAll function. -Use `NewWriter` to create a new instance that can be used for both. - -To create a writer with default options, do like this: - -```Go -// Compress input to output. -func Compress(in io.Reader, out io.Writer) error { - enc, err := zstd.NewWriter(out) - if err != nil { - return err - } - _, err = io.Copy(enc, in) - if err != nil { - enc.Close() - return err - } - return enc.Close() -} -``` - -Now you can encode by writing data to `enc`. The output will be finished writing when `Close()` is called. -Even if your encode fails, you should still call `Close()` to release any resources that may be held up. - -The above is fine for big encodes. However, whenever possible try to *reuse* the writer. - -To reuse the encoder, you can use the `Reset(io.Writer)` function to change to another output. -This will allow the encoder to reuse all resources and avoid wasteful allocations. - -Currently stream encoding has 'light' concurrency, meaning up to 2 goroutines can be working on part -of a stream. This is independent of the `WithEncoderConcurrency(n)`, but that is likely to change -in the future. So if you want to limit concurrency for future updates, specify the concurrency -you would like. - -If you would like stream encoding to be done without spawning async goroutines, use `WithEncoderConcurrency(1)` -which will compress input as each block is completed, blocking on writes until each has completed. - -You can specify your desired compression level using `WithEncoderLevel()` option. Currently only pre-defined -compression settings can be specified. - -#### Future Compatibility Guarantees - -This will be an evolving project. When using this package it is important to note that both the compression efficiency and speed may change. - -The goal will be to keep the default efficiency at the default zstd (level 3). -However the encoding should never be assumed to remain the same, -and you should not use hashes of compressed output for similarity checks. - -The Encoder can be assumed to produce the same output from the exact same code version. -However, the may be modes in the future that break this, -although they will not be enabled without an explicit option. - -This encoder is not designed to (and will probably never) output the exact same bitstream as the reference encoder. - -Also note, that the cgo decompressor currently does not [report all errors on invalid input](https://github.com/DataDog/zstd/issues/59), -[omits error checks](https://github.com/DataDog/zstd/issues/61), [ignores checksums](https://github.com/DataDog/zstd/issues/43) -and seems to ignore concatenated streams, even though [it is part of the spec](https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frames). - -#### Blocks - -For compressing small blocks, the returned encoder has a function called `EncodeAll(src, dst []byte) []byte`. - -`EncodeAll` will encode all input in src and append it to dst. -This function can be called concurrently. -Each call will only run on a same goroutine as the caller. - -Encoded blocks can be concatenated and the result will be the combined input stream. -Data compressed with EncodeAll can be decoded with the Decoder, using either a stream or `DecodeAll`. - -Especially when encoding blocks you should take special care to reuse the encoder. -This will effectively make it run without allocations after a warmup period. -To make it run completely without allocations, supply a destination buffer with space for all content. - -```Go -import "github.com/klauspost/compress/zstd" - -// Create a writer that caches compressors. -// For this operation type we supply a nil Reader. -var encoder, _ = zstd.NewWriter(nil) - -// Compress a buffer. -// If you have a destination buffer, the allocation in the call can also be eliminated. -func Compress(src []byte) []byte { - return encoder.EncodeAll(src, make([]byte, 0, len(src))) -} -``` - -You can control the maximum number of concurrent encodes using the `WithEncoderConcurrency(n)` -option when creating the writer. - -Using the Encoder for both a stream and individual blocks concurrently is safe. - -### Performance - -I have collected some speed examples to compare speed and compression against other compressors. - -* `file` is the input file. -* `out` is the compressor used. `zskp` is this package. `zstd` is the Datadog cgo library. `gzstd/gzkp` is gzip standard and this library. -* `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default"; 3 is "better", 4 is "best". -* `insize`/`outsize` is the input/output size. -* `millis` is the number of milliseconds used for compression. -* `mb/s` is megabytes (2^20 bytes) per second. - -``` -Silesia Corpus: -http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip - -This package: -file out level insize outsize millis mb/s -silesia.tar zskp 1 211947520 73821326 634 318.47 -silesia.tar zskp 2 211947520 67655404 1508 133.96 -silesia.tar zskp 3 211947520 64746933 3000 67.37 -silesia.tar zskp 4 211947520 60073508 16926 11.94 - -cgo zstd: -silesia.tar zstd 1 211947520 73605392 543 371.56 -silesia.tar zstd 3 211947520 66793289 864 233.68 -silesia.tar zstd 6 211947520 62916450 1913 105.66 -silesia.tar zstd 9 211947520 60212393 5063 39.92 - -gzip, stdlib/this package: -silesia.tar gzstd 1 211947520 80007735 1498 134.87 -silesia.tar gzkp 1 211947520 80088272 1009 200.31 - -GOB stream of binary data. Highly compressible. -https://files.klauspost.com/compress/gob-stream.7z - -file out level insize outsize millis mb/s -gob-stream zskp 1 1911399616 233948096 3230 564.34 -gob-stream zskp 2 1911399616 203997694 4997 364.73 -gob-stream zskp 3 1911399616 173526523 13435 135.68 -gob-stream zskp 4 1911399616 162195235 47559 38.33 - -gob-stream zstd 1 1911399616 249810424 2637 691.26 -gob-stream zstd 3 1911399616 208192146 3490 522.31 -gob-stream zstd 6 1911399616 193632038 6687 272.56 -gob-stream zstd 9 1911399616 177620386 16175 112.70 - -gob-stream gzstd 1 1911399616 357382013 9046 201.49 -gob-stream gzkp 1 1911399616 359136669 4885 373.08 - -The test data for the Large Text Compression Benchmark is the first -10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. -http://mattmahoney.net/dc/textdata.html - -file out level insize outsize millis mb/s -enwik9 zskp 1 1000000000 343833605 3687 258.64 -enwik9 zskp 2 1000000000 317001237 7672 124.29 -enwik9 zskp 3 1000000000 291915823 15923 59.89 -enwik9 zskp 4 1000000000 261710291 77697 12.27 - -enwik9 zstd 1 1000000000 358072021 3110 306.65 -enwik9 zstd 3 1000000000 313734672 4784 199.35 -enwik9 zstd 6 1000000000 295138875 10290 92.68 -enwik9 zstd 9 1000000000 278348700 28549 33.40 - -enwik9 gzstd 1 1000000000 382578136 8608 110.78 -enwik9 gzkp 1 1000000000 382781160 5628 169.45 - -Highly compressible JSON file. -https://files.klauspost.com/compress/github-june-2days-2019.json.zst - -file out level insize outsize millis mb/s -github-june-2days-2019.json zskp 1 6273951764 697439532 9789 611.17 -github-june-2days-2019.json zskp 2 6273951764 610876538 18553 322.49 -github-june-2days-2019.json zskp 3 6273951764 517662858 44186 135.41 -github-june-2days-2019.json zskp 4 6273951764 464617114 165373 36.18 - -github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00 -github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57 -github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18 -github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16 - -github-june-2days-2019.json gzstd 1 6273951764 1164397768 26793 223.32 -github-june-2days-2019.json gzkp 1 6273951764 1120631856 17693 338.16 - -VM Image, Linux mint with a few installed applications: -https://files.klauspost.com/compress/rawstudio-mint14.7z - -file out level insize outsize millis mb/s -rawstudio-mint14.tar zskp 1 8558382592 3718400221 18206 448.29 -rawstudio-mint14.tar zskp 2 8558382592 3326118337 37074 220.15 -rawstudio-mint14.tar zskp 3 8558382592 3163842361 87306 93.49 -rawstudio-mint14.tar zskp 4 8558382592 2970480650 783862 10.41 - -rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27 -rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92 -rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77 -rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91 - -rawstudio-mint14.tar gzstd 1 8558382592 3926234992 51345 158.96 -rawstudio-mint14.tar gzkp 1 8558382592 3960117298 36722 222.26 - -CSV data: -https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst - -file out level insize outsize millis mb/s -nyc-taxi-data-10M.csv zskp 1 3325605752 641319332 9462 335.17 -nyc-taxi-data-10M.csv zskp 2 3325605752 588976126 17570 180.50 -nyc-taxi-data-10M.csv zskp 3 3325605752 529329260 32432 97.79 -nyc-taxi-data-10M.csv zskp 4 3325605752 474949772 138025 22.98 - -nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18 -nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07 -nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27 -nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12 - -nyc-taxi-data-10M.csv gzstd 1 3325605752 928654908 21270 149.11 -nyc-taxi-data-10M.csv gzkp 1 3325605752 922273214 13929 227.68 -``` - -## Decompressor - -Status: STABLE - there may still be subtle bugs, but a wide variety of content has been tested. - -This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz), -kindly supplied by [fuzzit.dev](https://fuzzit.dev/). -The main purpose of the fuzz testing is to ensure that it is not possible to crash the decoder, -or run it past its limits with ANY input provided. - -### Usage - -The package has been designed for two main usages, big streams of data and smaller in-memory buffers. -There are two main usages of the package for these. Both of them are accessed by creating a `Decoder`. - -For streaming use a simple setup could look like this: - -```Go -import "github.com/klauspost/compress/zstd" - -func Decompress(in io.Reader, out io.Writer) error { - d, err := zstd.NewReader(in) - if err != nil { - return err - } - defer d.Close() - - // Copy content... - _, err = io.Copy(out, d) - return err -} -``` - -It is important to use the "Close" function when you no longer need the Reader to stop running goroutines, -when running with default settings. -Goroutines will exit once an error has been returned, including `io.EOF` at the end of a stream. - -Streams are decoded concurrently in 4 asynchronous stages to give the best possible throughput. -However, if you prefer synchronous decompression, use `WithDecoderConcurrency(1)` which will decompress data -as it is being requested only. - -For decoding buffers, it could look something like this: - -```Go -import "github.com/klauspost/compress/zstd" - -// Create a reader that caches decompressors. -// For this operation type we supply a nil Reader. -var decoder, _ = zstd.NewReader(nil, zstd.WithDecoderConcurrency(0)) - -// Decompress a buffer. We don't supply a destination buffer, -// so it will be allocated by the decoder. -func Decompress(src []byte) ([]byte, error) { - return decoder.DecodeAll(src, nil) -} -``` - -Both of these cases should provide the functionality needed. -The decoder can be used for *concurrent* decompression of multiple buffers. -By default 4 decompressors will be created. - -It will only allow a certain number of concurrent operations to run. -To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder. -It is possible to use `WithDecoderConcurrency(0)` to create GOMAXPROCS decoders. - -### Dictionaries - -Data compressed with [dictionaries](https://github.com/facebook/zstd#the-case-for-small-data-compression) can be decompressed. - -Dictionaries are added individually to Decoders. -Dictionaries are generated by the `zstd --train` command and contains an initial state for the decoder. -To add a dictionary use the `WithDecoderDicts(dicts ...[]byte)` option with the dictionary data. -Several dictionaries can be added at once. - -The dictionary will be used automatically for the data that specifies them. -A re-used Decoder will still contain the dictionaries registered. - -When registering multiple dictionaries with the same ID, the last one will be used. - -It is possible to use dictionaries when compressing data. - -To enable a dictionary use `WithEncoderDict(dict []byte)`. Here only one dictionary will be used -and it will likely be used even if it doesn't improve compression. - -The used dictionary must be used to decompress the content. - -For any real gains, the dictionary should be built with similar data. -If an unsuitable dictionary is used the output may be slightly larger than using no dictionary. -Use the [zstd commandline tool](https://github.com/facebook/zstd/releases) to build a dictionary from sample data. -For information see [zstd dictionary information](https://github.com/facebook/zstd#the-case-for-small-data-compression). - -For now there is a fixed startup performance penalty for compressing content with dictionaries. -This will likely be improved over time. Just be aware to test performance when implementing. - -### Allocation-less operation - -The decoder has been designed to operate without allocations after a warmup. - -This means that you should *store* the decoder for best performance. -To re-use a stream decoder, use the `Reset(r io.Reader) error` to switch to another stream. -A decoder can safely be re-used even if the previous stream failed. - -To release the resources, you must call the `Close()` function on a decoder. -After this it can *no longer be reused*, but all running goroutines will be stopped. -So you *must* use this if you will no longer need the Reader. - -For decompressing smaller buffers a single decoder can be used. -When decoding buffers, you can supply a destination slice with length 0 and your expected capacity. -In this case no unneeded allocations should be made. - -### Concurrency - -The buffer decoder does everything on the same goroutine and does nothing concurrently. -It can however decode several buffers concurrently. Use `WithDecoderConcurrency(n)` to limit that. - -The stream decoder will create goroutines that: - -1) Reads input and splits the input into blocks. -2) Decompression of literals. -3) Decompression of sequences. -4) Reconstruction of output stream. - -So effectively this also means the decoder will "read ahead" and prepare data to always be available for output. - -The concurrency level will, for streams, determine how many blocks ahead the compression will start. - -Since "blocks" are quite dependent on the output of the previous block stream decoding will only have limited concurrency. - -In practice this means that concurrency is often limited to utilizing about 3 cores effectively. - -### Benchmarks - -The first two are streaming decodes and the last are smaller inputs. - -Running on AMD Ryzen 9 3950X 16-Core Processor. AMD64 assembly used. - -``` -BenchmarkDecoderSilesia-32 5 206878840 ns/op 1024.50 MB/s 49808 B/op 43 allocs/op -BenchmarkDecoderEnwik9-32 1 1271809000 ns/op 786.28 MB/s 72048 B/op 52 allocs/op - -Concurrent blocks, performance: - -BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32 67356 17857 ns/op 10321.96 MB/s 22.48 pct 102 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32 266656 4421 ns/op 26823.21 MB/s 11.89 pct 19 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32 20992 56842 ns/op 8477.17 MB/s 39.90 pct 754 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32 27456 43932 ns/op 9714.01 MB/s 33.27 pct 524 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32 78432 15047 ns/op 8319.15 MB/s 40.34 pct 66 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32 65800 18436 ns/op 8249.63 MB/s 37.75 pct 88 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32 102993 11523 ns/op 35546.09 MB/s 3.637 pct 143 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32 1000000 1070 ns/op 95720.98 MB/s 80.53 pct 3 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32 749802 1752 ns/op 70272.35 MB/s 100.0 pct 5 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32 22640 52934 ns/op 13263.37 MB/s 26.25 pct 1014 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/html.zst-32 226412 5232 ns/op 19572.27 MB/s 14.49 pct 20 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32 923041 1276 ns/op 3194.71 MB/s 31.26 pct 0 B/op 0 allocs/op -``` - -This reflects the performance around May 2022, but this may be out of date. - -## Zstd inside ZIP files - -It is possible to use zstandard to compress individual files inside zip archives. -While this isn't widely supported it can be useful for internal files. - -To support the compression and decompression of these files you must register a compressor and decompressor. - -It is highly recommended registering the (de)compressors on individual zip Reader/Writer and NOT -use the global registration functions. The main reason for this is that 2 registrations from -different packages will result in a panic. - -It is a good idea to only have a single compressor and decompressor, since they can be used for multiple zip -files concurrently, and using a single instance will allow reusing some resources. - -See [this example](https://pkg.go.dev/github.com/klauspost/compress/zstd#example-ZipCompressor) for -how to compress and decompress files inside zip archives. - -# Contributions - -Contributions are always welcome. -For new features/fixes, remember to add tests and for performance enhancements include benchmarks. - -For general feedback and experience reports, feel free to open an issue or write me on [Twitter](https://twitter.com/sh0dan). - -This package includes the excellent [`github.com/cespare/xxhash`](https://github.com/cespare/xxhash) package Copyright (c) 2016 Caleb Spare. diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go deleted file mode 100644 index d41e3e170..000000000 --- a/vendor/github.com/klauspost/compress/zstd/bitreader.go +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "errors" - "fmt" - "io" - "math/bits" - - "github.com/klauspost/compress/internal/le" -) - -// bitReader reads a bitstream in reverse. -// The last set bit indicates the start of the stream and is used -// for aligning the input. -type bitReader struct { - in []byte - value uint64 // Maybe use [16]byte, but shifting is awkward. - cursor int // offset where next read should end - bitsRead uint8 -} - -// init initializes and resets the bit reader. -func (b *bitReader) init(in []byte) error { - if len(in) < 1 { - return errors.New("corrupt stream: too short") - } - b.in = in - // The highest bit of the last byte indicates where to start - v := in[len(in)-1] - if v == 0 { - return errors.New("corrupt stream, did not find end of stream") - } - b.cursor = len(in) - b.bitsRead = 64 - b.value = 0 - if len(in) >= 8 { - b.fillFastStart() - } else { - b.fill() - b.fill() - } - b.bitsRead += 8 - uint8(highBits(uint32(v))) - return nil -} - -// getBits will return n bits. n can be 0. -func (b *bitReader) getBits(n uint8) int { - if n == 0 /*|| b.bitsRead >= 64 */ { - return 0 - } - return int(b.get32BitsFast(n)) -} - -// get32BitsFast requires that at least one bit is requested every time. -// There are no checks if the buffer is filled. -func (b *bitReader) get32BitsFast(n uint8) uint32 { - const regMask = 64 - 1 - v := uint32((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) - b.bitsRead += n - return v -} - -// fillFast() will make sure at least 32 bits are available. -// There must be at least 4 bytes available. -func (b *bitReader) fillFast() { - if b.bitsRead < 32 { - return - } - b.cursor -= 4 - b.value = (b.value << 32) | uint64(le.Load32(b.in, b.cursor)) - b.bitsRead -= 32 -} - -// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. -func (b *bitReader) fillFastStart() { - b.cursor -= 8 - b.value = le.Load64(b.in, b.cursor) - b.bitsRead = 0 -} - -// fill() will make sure at least 32 bits are available. -func (b *bitReader) fill() { - if b.bitsRead < 32 { - return - } - if b.cursor >= 4 { - b.cursor -= 4 - b.value = (b.value << 32) | uint64(le.Load32(b.in, b.cursor)) - b.bitsRead -= 32 - return - } - - b.bitsRead -= uint8(8 * b.cursor) - for b.cursor > 0 { - b.cursor -= 1 - b.value = (b.value << 8) | uint64(b.in[b.cursor]) - } -} - -// finished returns true if all bits have been read from the bit stream. -func (b *bitReader) finished() bool { - return b.cursor == 0 && b.bitsRead >= 64 -} - -// overread returns true if more bits have been requested than is on the stream. -func (b *bitReader) overread() bool { - return b.bitsRead > 64 -} - -// remain returns the number of bits remaining. -func (b *bitReader) remain() uint { - return 8*uint(b.cursor) + 64 - uint(b.bitsRead) -} - -// close the bitstream and returns an error if out-of-buffer reads occurred. -func (b *bitReader) close() error { - // Release reference. - b.in = nil - b.cursor = 0 - if !b.finished() { - return fmt.Errorf("%d extra bits on block, should be 0", b.remain()) - } - if b.bitsRead > 64 { - return io.ErrUnexpectedEOF - } - return nil -} - -func highBits(val uint32) (n uint32) { - return uint32(bits.Len32(val) - 1) -} diff --git a/vendor/github.com/klauspost/compress/zstd/bitwriter.go b/vendor/github.com/klauspost/compress/zstd/bitwriter.go deleted file mode 100644 index 1952f175b..000000000 --- a/vendor/github.com/klauspost/compress/zstd/bitwriter.go +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright 2018 Klaus Post. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. - -package zstd - -// bitWriter will write bits. -// First bit will be LSB of the first byte of output. -type bitWriter struct { - bitContainer uint64 - nBits uint8 - out []byte -} - -// bitMask16 is bitmasks. Has extra to avoid bounds check. -var bitMask16 = [32]uint16{ - 0, 1, 3, 7, 0xF, 0x1F, - 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, - 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF, - 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, - 0xFFFF, 0xFFFF} /* up to 16 bits */ - -var bitMask32 = [32]uint32{ - 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, - 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, - 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF, - 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF, -} // up to 32 bits - -// addBits16NC will add up to 16 bits. -// It will not check if there is space for them, -// so the caller must ensure that it has flushed recently. -func (b *bitWriter) addBits16NC(value uint16, bits uint8) { - b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63) - b.nBits += bits -} - -// addBits32NC will add up to 31 bits. -// It will not check if there is space for them, -// so the caller must ensure that it has flushed recently. -func (b *bitWriter) addBits32NC(value uint32, bits uint8) { - b.bitContainer |= uint64(value&bitMask32[bits&31]) << (b.nBits & 63) - b.nBits += bits -} - -// addBits64NC will add up to 64 bits. -// There must be space for 32 bits. -func (b *bitWriter) addBits64NC(value uint64, bits uint8) { - if bits <= 31 { - b.addBits32Clean(uint32(value), bits) - return - } - b.addBits32Clean(uint32(value), 32) - b.flush32() - b.addBits32Clean(uint32(value>>32), bits-32) -} - -// addBits32Clean will add up to 32 bits. -// It will not check if there is space for them. -// The input must not contain more bits than specified. -func (b *bitWriter) addBits32Clean(value uint32, bits uint8) { - b.bitContainer |= uint64(value) << (b.nBits & 63) - b.nBits += bits -} - -// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. -// It will not check if there is space for them, so the caller must ensure that it has flushed recently. -func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { - b.bitContainer |= uint64(value) << (b.nBits & 63) - b.nBits += bits -} - -// flush32 will flush out, so there are at least 32 bits available for writing. -func (b *bitWriter) flush32() { - if b.nBits < 32 { - return - } - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24)) - b.nBits -= 32 - b.bitContainer >>= 32 -} - -// flushAlign will flush remaining full bytes and align to next byte boundary. -func (b *bitWriter) flushAlign() { - nbBytes := (b.nBits + 7) >> 3 - for i := uint8(0); i < nbBytes; i++ { - b.out = append(b.out, byte(b.bitContainer>>(i*8))) - } - b.nBits = 0 - b.bitContainer = 0 -} - -// close will write the alignment bit and write the final byte(s) -// to the output. -func (b *bitWriter) close() { - // End mark - b.addBits16Clean(1, 1) - // flush until next byte. - b.flushAlign() -} - -// reset and continue writing by appending to out. -func (b *bitWriter) reset(out []byte) { - b.bitContainer = 0 - b.nBits = 0 - b.out = out -} diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go deleted file mode 100644 index 0dd742fd2..000000000 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ /dev/null @@ -1,712 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "errors" - "fmt" - "hash/crc32" - "io" - "sync" - - "github.com/klauspost/compress/huff0" - "github.com/klauspost/compress/zstd/internal/xxhash" -) - -type blockType uint8 - -//go:generate stringer -type=blockType,literalsBlockType,seqCompMode,tableIndex - -const ( - blockTypeRaw blockType = iota - blockTypeRLE - blockTypeCompressed - blockTypeReserved -) - -type literalsBlockType uint8 - -const ( - literalsBlockRaw literalsBlockType = iota - literalsBlockRLE - literalsBlockCompressed - literalsBlockTreeless -) - -const ( - // maxCompressedBlockSize is the biggest allowed compressed block size (128KB) - maxCompressedBlockSize = 128 << 10 - - compressedBlockOverAlloc = 16 - maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc - - // Maximum possible block size (all Raw+Uncompressed). - maxBlockSize = (1 << 21) - 1 - - maxMatchLen = 131074 - maxSequences = 0x7f00 + 0xffff - - // We support slightly less than the reference decoder to be able to - // use ints on 32 bit archs. - maxOffsetBits = 30 -) - -var ( - huffDecoderPool = sync.Pool{New: func() interface{} { - return &huff0.Scratch{} - }} - - fseDecoderPool = sync.Pool{New: func() interface{} { - return &fseDecoder{} - }} -) - -type blockDec struct { - // Raw source data of the block. - data []byte - dataStorage []byte - - // Destination of the decoded data. - dst []byte - - // Buffer for literals data. - literalBuf []byte - - // Window size of the block. - WindowSize uint64 - - err error - - // Check against this crc, if hasCRC is true. - checkCRC uint32 - hasCRC bool - - // Frame to use for singlethreaded decoding. - // Should not be used by the decoder itself since parent may be another frame. - localFrame *frameDec - - sequence []seqVals - - async struct { - newHist *history - literals []byte - seqData []byte - seqSize int // Size of uncompressed sequences - fcs uint64 - } - - // Block is RLE, this is the size. - RLESize uint32 - - Type blockType - - // Is this the last block of a frame? - Last bool - - // Use less memory - lowMem bool -} - -func (b *blockDec) String() string { - if b == nil { - return "" - } - return fmt.Sprintf("Steam Size: %d, Type: %v, Last: %t, Window: %d", len(b.data), b.Type, b.Last, b.WindowSize) -} - -func newBlockDec(lowMem bool) *blockDec { - b := blockDec{ - lowMem: lowMem, - } - return &b -} - -// reset will reset the block. -// Input must be a start of a block and will be at the end of the block when returned. -func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { - b.WindowSize = windowSize - tmp, err := br.readSmall(3) - if err != nil { - println("Reading block header:", err) - return err - } - bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) - b.Last = bh&1 != 0 - b.Type = blockType((bh >> 1) & 3) - // find size. - cSize := int(bh >> 3) - maxSize := maxCompressedBlockSizeAlloc - switch b.Type { - case blockTypeReserved: - return ErrReservedBlockType - case blockTypeRLE: - if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) { - if debugDecoder { - printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b) - } - return ErrWindowSizeExceeded - } - b.RLESize = uint32(cSize) - if b.lowMem { - maxSize = cSize - } - cSize = 1 - case blockTypeCompressed: - if debugDecoder { - println("Data size on stream:", cSize) - } - b.RLESize = 0 - maxSize = maxCompressedBlockSizeAlloc - if windowSize < maxCompressedBlockSize && b.lowMem { - maxSize = int(windowSize) + compressedBlockOverAlloc - } - if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize { - if debugDecoder { - printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b) - } - return ErrCompressedSizeTooBig - } - // Empty compressed blocks must at least be 2 bytes - // for Literals_Block_Type and one for Sequences_Section_Header. - if cSize < 2 { - return ErrBlockTooSmall - } - case blockTypeRaw: - if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) { - if debugDecoder { - printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b) - } - return ErrWindowSizeExceeded - } - - b.RLESize = 0 - // We do not need a destination for raw blocks. - maxSize = -1 - default: - panic("Invalid block type") - } - - // Read block data. - if _, ok := br.(*byteBuf); !ok && cap(b.dataStorage) < cSize { - // byteBuf doesn't need a destination buffer. - if b.lowMem || cSize > maxCompressedBlockSize { - b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc) - } else { - b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc) - } - } - b.data, err = br.readBig(cSize, b.dataStorage) - if err != nil { - if debugDecoder { - println("Reading block:", err, "(", cSize, ")", len(b.data)) - printf("%T", br) - } - return err - } - if cap(b.dst) <= maxSize { - b.dst = make([]byte, 0, maxSize+1) - } - return nil -} - -// sendEOF will make the decoder send EOF on this frame. -func (b *blockDec) sendErr(err error) { - b.Last = true - b.Type = blockTypeReserved - b.err = err -} - -// Close will release resources. -// Closed blockDec cannot be reset. -func (b *blockDec) Close() { -} - -// decodeBuf -func (b *blockDec) decodeBuf(hist *history) error { - switch b.Type { - case blockTypeRLE: - if cap(b.dst) < int(b.RLESize) { - if b.lowMem { - b.dst = make([]byte, b.RLESize) - } else { - b.dst = make([]byte, maxCompressedBlockSize) - } - } - b.dst = b.dst[:b.RLESize] - v := b.data[0] - for i := range b.dst { - b.dst[i] = v - } - hist.appendKeep(b.dst) - return nil - case blockTypeRaw: - hist.appendKeep(b.data) - return nil - case blockTypeCompressed: - saved := b.dst - // Append directly to history - if hist.ignoreBuffer == 0 { - b.dst = hist.b - hist.b = nil - } else { - b.dst = b.dst[:0] - } - err := b.decodeCompressed(hist) - if debugDecoder { - println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err) - } - if hist.ignoreBuffer == 0 { - hist.b = b.dst - b.dst = saved - } else { - hist.appendKeep(b.dst) - } - return err - case blockTypeReserved: - // Used for returning errors. - return b.err - default: - panic("Invalid block type") - } -} - -func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err error) { - // There must be at least one byte for Literals_Block_Type and one for Sequences_Section_Header - if len(in) < 2 { - return in, ErrBlockTooSmall - } - - litType := literalsBlockType(in[0] & 3) - var litRegenSize int - var litCompSize int - sizeFormat := (in[0] >> 2) & 3 - var fourStreams bool - var literals []byte - switch litType { - case literalsBlockRaw, literalsBlockRLE: - switch sizeFormat { - case 0, 2: - // Regenerated_Size uses 5 bits (0-31). Literals_Section_Header uses 1 byte. - litRegenSize = int(in[0] >> 3) - in = in[1:] - case 1: - // Regenerated_Size uses 12 bits (0-4095). Literals_Section_Header uses 2 bytes. - litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) - in = in[2:] - case 3: - // Regenerated_Size uses 20 bits (0-1048575). Literals_Section_Header uses 3 bytes. - if len(in) < 3 { - println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) - return in, ErrBlockTooSmall - } - litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + (int(in[2]) << 12) - in = in[3:] - } - case literalsBlockCompressed, literalsBlockTreeless: - switch sizeFormat { - case 0, 1: - // Both Regenerated_Size and Compressed_Size use 10 bits (0-1023). - if len(in) < 3 { - println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) - return in, ErrBlockTooSmall - } - n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) - litRegenSize = int(n & 1023) - litCompSize = int(n >> 10) - fourStreams = sizeFormat == 1 - in = in[3:] - case 2: - fourStreams = true - if len(in) < 4 { - println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) - return in, ErrBlockTooSmall - } - n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) - litRegenSize = int(n & 16383) - litCompSize = int(n >> 14) - in = in[4:] - case 3: - fourStreams = true - if len(in) < 5 { - println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) - return in, ErrBlockTooSmall - } - n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + (uint64(in[4]) << 28) - litRegenSize = int(n & 262143) - litCompSize = int(n >> 18) - in = in[5:] - } - } - if debugDecoder { - println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams) - } - if litRegenSize > int(b.WindowSize) || litRegenSize > maxCompressedBlockSize { - return in, ErrWindowSizeExceeded - } - - switch litType { - case literalsBlockRaw: - if len(in) < litRegenSize { - println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litRegenSize) - return in, ErrBlockTooSmall - } - literals = in[:litRegenSize] - in = in[litRegenSize:] - //printf("Found %d uncompressed literals\n", litRegenSize) - case literalsBlockRLE: - if len(in) < 1 { - println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", 1) - return in, ErrBlockTooSmall - } - if cap(b.literalBuf) < litRegenSize { - if b.lowMem { - b.literalBuf = make([]byte, litRegenSize, litRegenSize+compressedBlockOverAlloc) - } else { - b.literalBuf = make([]byte, litRegenSize, maxCompressedBlockSize+compressedBlockOverAlloc) - } - } - literals = b.literalBuf[:litRegenSize] - v := in[0] - for i := range literals { - literals[i] = v - } - in = in[1:] - if debugDecoder { - printf("Found %d RLE compressed literals\n", litRegenSize) - } - case literalsBlockTreeless: - if len(in) < litCompSize { - println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize) - return in, ErrBlockTooSmall - } - // Store compressed literals, so we defer decoding until we get history. - literals = in[:litCompSize] - in = in[litCompSize:] - if debugDecoder { - printf("Found %d compressed literals\n", litCompSize) - } - huff := hist.huffTree - if huff == nil { - return in, errors.New("literal block was treeless, but no history was defined") - } - // Ensure we have space to store it. - if cap(b.literalBuf) < litRegenSize { - if b.lowMem { - b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc) - } else { - b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc) - } - } - var err error - // Use our out buffer. - huff.MaxDecodedSize = litRegenSize - if fourStreams { - literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) - } else { - literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) - } - // Make sure we don't leak our literals buffer - if err != nil { - println("decompressing literals:", err) - return in, err - } - if len(literals) != litRegenSize { - return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) - } - - case literalsBlockCompressed: - if len(in) < litCompSize { - println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize) - return in, ErrBlockTooSmall - } - literals = in[:litCompSize] - in = in[litCompSize:] - // Ensure we have space to store it. - if cap(b.literalBuf) < litRegenSize { - if b.lowMem { - b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc) - } else { - b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc) - } - } - huff := hist.huffTree - if huff == nil || (hist.dict != nil && huff == hist.dict.litEnc) { - huff = huffDecoderPool.Get().(*huff0.Scratch) - if huff == nil { - huff = &huff0.Scratch{} - } - } - var err error - if debugDecoder { - println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals)) - } - huff, literals, err = huff0.ReadTable(literals, huff) - if err != nil { - println("reading huffman table:", err) - return in, err - } - hist.huffTree = huff - huff.MaxDecodedSize = litRegenSize - // Use our out buffer. - if fourStreams { - literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) - } else { - literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) - } - if err != nil { - println("decoding compressed literals:", err) - return in, err - } - // Make sure we don't leak our literals buffer - if len(literals) != litRegenSize { - return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) - } - // Re-cap to get extra size. - literals = b.literalBuf[:len(literals)] - if debugDecoder { - printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize) - } - } - hist.decoders.literals = literals - return in, nil -} - -// decodeCompressed will start decompressing a block. -func (b *blockDec) decodeCompressed(hist *history) error { - in := b.data - in, err := b.decodeLiterals(in, hist) - if err != nil { - return err - } - err = b.prepareSequences(in, hist) - if err != nil { - return err - } - if hist.decoders.nSeqs == 0 { - b.dst = append(b.dst, hist.decoders.literals...) - return nil - } - before := len(hist.decoders.out) - err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:]) - if err != nil { - return err - } - if hist.decoders.maxSyncLen > 0 { - hist.decoders.maxSyncLen += uint64(before) - hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out)) - } - b.dst = hist.decoders.out - hist.recentOffsets = hist.decoders.prevOffset - return nil -} - -func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) { - if debugDecoder { - printf("prepareSequences: %d byte(s) input\n", len(in)) - } - // Decode Sequences - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section - if len(in) < 1 { - return ErrBlockTooSmall - } - var nSeqs int - seqHeader := in[0] - switch { - case seqHeader < 128: - nSeqs = int(seqHeader) - in = in[1:] - case seqHeader < 255: - if len(in) < 2 { - return ErrBlockTooSmall - } - nSeqs = int(seqHeader-128)<<8 | int(in[1]) - in = in[2:] - case seqHeader == 255: - if len(in) < 3 { - return ErrBlockTooSmall - } - nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8) - in = in[3:] - } - if nSeqs == 0 && len(in) != 0 { - // When no sequences, there should not be any more data... - if debugDecoder { - printf("prepareSequences: 0 sequences, but %d byte(s) left on stream\n", len(in)) - } - return ErrUnexpectedBlockSize - } - - var seqs = &hist.decoders - seqs.nSeqs = nSeqs - if nSeqs > 0 { - if len(in) < 1 { - return ErrBlockTooSmall - } - br := byteReader{b: in, off: 0} - compMode := br.Uint8() - br.advance(1) - if debugDecoder { - printf("Compression modes: 0b%b", compMode) - } - if compMode&3 != 0 { - return errors.New("corrupt block: reserved bits not zero") - } - for i := uint(0); i < 3; i++ { - mode := seqCompMode((compMode >> (6 - i*2)) & 3) - if debugDecoder { - println("Table", tableIndex(i), "is", mode) - } - var seq *sequenceDec - switch tableIndex(i) { - case tableLiteralLengths: - seq = &seqs.litLengths - case tableOffsets: - seq = &seqs.offsets - case tableMatchLengths: - seq = &seqs.matchLengths - default: - panic("unknown table") - } - switch mode { - case compModePredefined: - if seq.fse != nil && !seq.fse.preDefined { - fseDecoderPool.Put(seq.fse) - } - seq.fse = &fsePredef[i] - case compModeRLE: - if br.remain() < 1 { - return ErrBlockTooSmall - } - v := br.Uint8() - br.advance(1) - if seq.fse == nil || seq.fse.preDefined { - seq.fse = fseDecoderPool.Get().(*fseDecoder) - } - symb, err := decSymbolValue(v, symbolTableX[i]) - if err != nil { - printf("RLE Transform table (%v) error: %v", tableIndex(i), err) - return err - } - seq.fse.setRLE(symb) - if debugDecoder { - printf("RLE set to 0x%x, code: %v", symb, v) - } - case compModeFSE: - if debugDecoder { - println("Reading table for", tableIndex(i)) - } - if seq.fse == nil || seq.fse.preDefined { - seq.fse = fseDecoderPool.Get().(*fseDecoder) - } - err := seq.fse.readNCount(&br, uint16(maxTableSymbol[i])) - if err != nil { - println("Read table error:", err) - return err - } - err = seq.fse.transform(symbolTableX[i]) - if err != nil { - println("Transform table error:", err) - return err - } - if debugDecoder { - println("Read table ok", "symbolLen:", seq.fse.symbolLen) - } - case compModeRepeat: - seq.repeat = true - } - if br.overread() { - return io.ErrUnexpectedEOF - } - } - in = br.unread() - } - if debugDecoder { - println("Literals:", len(seqs.literals), "hash:", xxhash.Sum64(seqs.literals), "and", seqs.nSeqs, "sequences.") - } - - if nSeqs == 0 { - if len(b.sequence) > 0 { - b.sequence = b.sequence[:0] - } - return nil - } - br := seqs.br - if br == nil { - br = &bitReader{} - } - if err := br.init(in); err != nil { - return err - } - - if err := seqs.initialize(br, hist, b.dst); err != nil { - println("initializing sequences:", err) - return err - } - - return nil -} - -func (b *blockDec) decodeSequences(hist *history) error { - if cap(b.sequence) < hist.decoders.nSeqs { - if b.lowMem { - b.sequence = make([]seqVals, 0, hist.decoders.nSeqs) - } else { - b.sequence = make([]seqVals, 0, 0x7F00+0xffff) - } - } - b.sequence = b.sequence[:hist.decoders.nSeqs] - if hist.decoders.nSeqs == 0 { - hist.decoders.seqSize = len(hist.decoders.literals) - return nil - } - hist.decoders.windowSize = hist.windowSize - hist.decoders.prevOffset = hist.recentOffsets - - err := hist.decoders.decode(b.sequence) - hist.recentOffsets = hist.decoders.prevOffset - return err -} - -func (b *blockDec) executeSequences(hist *history) error { - hbytes := hist.b - if len(hbytes) > hist.windowSize { - hbytes = hbytes[len(hbytes)-hist.windowSize:] - // We do not need history anymore. - if hist.dict != nil { - hist.dict.content = nil - } - } - hist.decoders.windowSize = hist.windowSize - hist.decoders.out = b.dst[:0] - err := hist.decoders.execute(b.sequence, hbytes) - if err != nil { - return err - } - return b.updateHistory(hist) -} - -func (b *blockDec) updateHistory(hist *history) error { - if len(b.data) > maxCompressedBlockSize { - return fmt.Errorf("compressed block size too large (%d)", len(b.data)) - } - // Set output and release references. - b.dst = hist.decoders.out - hist.recentOffsets = hist.decoders.prevOffset - - if b.Last { - // if last block we don't care about history. - println("Last block, no history returned") - hist.b = hist.b[:0] - return nil - } else { - hist.append(b.dst) - if debugDecoder { - println("Finished block with ", len(b.sequence), "sequences. Added", len(b.dst), "to history, now length", len(hist.b)) - } - } - hist.decoders.out, hist.decoders.literals = nil, nil - - return nil -} diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go deleted file mode 100644 index fd35ea148..000000000 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ /dev/null @@ -1,892 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "errors" - "fmt" - "math" - "math/bits" - "slices" - - "github.com/klauspost/compress/huff0" -) - -type blockEnc struct { - size int - literals []byte - sequences []seq - coders seqCoders - litEnc *huff0.Scratch - dictLitEnc *huff0.Scratch - wr bitWriter - - extraLits int - output []byte - recentOffsets [3]uint32 - prevRecentOffsets [3]uint32 - - last bool - lowMem bool -} - -// init should be used once the block has been created. -// If called more than once, the effect is the same as calling reset. -func (b *blockEnc) init() { - if b.lowMem { - // 1K literals - if cap(b.literals) < 1<<10 { - b.literals = make([]byte, 0, 1<<10) - } - const defSeqs = 20 - if cap(b.sequences) < defSeqs { - b.sequences = make([]seq, 0, defSeqs) - } - // 1K - if cap(b.output) < 1<<10 { - b.output = make([]byte, 0, 1<<10) - } - } else { - if cap(b.literals) < maxCompressedBlockSize { - b.literals = make([]byte, 0, maxCompressedBlockSize) - } - const defSeqs = 2000 - if cap(b.sequences) < defSeqs { - b.sequences = make([]seq, 0, defSeqs) - } - if cap(b.output) < maxCompressedBlockSize { - b.output = make([]byte, 0, maxCompressedBlockSize) - } - } - - if b.coders.mlEnc == nil { - b.coders.mlEnc = &fseEncoder{} - b.coders.mlPrev = &fseEncoder{} - b.coders.ofEnc = &fseEncoder{} - b.coders.ofPrev = &fseEncoder{} - b.coders.llEnc = &fseEncoder{} - b.coders.llPrev = &fseEncoder{} - } - b.litEnc = &huff0.Scratch{WantLogLess: 4} - b.reset(nil) -} - -// initNewEncode can be used to reset offsets and encoders to the initial state. -func (b *blockEnc) initNewEncode() { - b.recentOffsets = [3]uint32{1, 4, 8} - b.litEnc.Reuse = huff0.ReusePolicyNone - b.coders.setPrev(nil, nil, nil) -} - -// reset will reset the block for a new encode, but in the same stream, -// meaning that state will be carried over, but the block content is reset. -// If a previous block is provided, the recent offsets are carried over. -func (b *blockEnc) reset(prev *blockEnc) { - b.extraLits = 0 - b.literals = b.literals[:0] - b.size = 0 - b.sequences = b.sequences[:0] - b.output = b.output[:0] - b.last = false - if prev != nil { - b.recentOffsets = prev.prevRecentOffsets - } - b.dictLitEnc = nil -} - -// reset will reset the block for a new encode, but in the same stream, -// meaning that state will be carried over, but the block content is reset. -// If a previous block is provided, the recent offsets are carried over. -func (b *blockEnc) swapEncoders(prev *blockEnc) { - b.coders.swap(&prev.coders) - b.litEnc, prev.litEnc = prev.litEnc, b.litEnc -} - -// blockHeader contains the information for a block header. -type blockHeader uint32 - -// setLast sets the 'last' indicator on a block. -func (h *blockHeader) setLast(b bool) { - if b { - *h = *h | 1 - } else { - const mask = (1 << 24) - 2 - *h = *h & mask - } -} - -// setSize will store the compressed size of a block. -func (h *blockHeader) setSize(v uint32) { - const mask = 7 - *h = (*h)&mask | blockHeader(v<<3) -} - -// setType sets the block type. -func (h *blockHeader) setType(t blockType) { - const mask = 1 | (((1 << 24) - 1) ^ 7) - *h = (*h & mask) | blockHeader(t<<1) -} - -// appendTo will append the block header to a slice. -func (h blockHeader) appendTo(b []byte) []byte { - return append(b, uint8(h), uint8(h>>8), uint8(h>>16)) -} - -// String returns a string representation of the block. -func (h blockHeader) String() string { - return fmt.Sprintf("Type: %d, Size: %d, Last:%t", (h>>1)&3, h>>3, h&1 == 1) -} - -// literalsHeader contains literals header information. -type literalsHeader uint64 - -// setType can be used to set the type of literal block. -func (h *literalsHeader) setType(t literalsBlockType) { - const mask = math.MaxUint64 - 3 - *h = (*h & mask) | literalsHeader(t) -} - -// setSize can be used to set a single size, for uncompressed and RLE content. -func (h *literalsHeader) setSize(regenLen int) { - inBits := bits.Len32(uint32(regenLen)) - // Only retain 2 bits - const mask = 3 - lh := uint64(*h & mask) - switch { - case inBits < 5: - lh |= (uint64(regenLen) << 3) | (1 << 60) - if debugEncoder { - got := int(lh>>3) & 0xff - if got != regenLen { - panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)")) - } - } - case inBits < 12: - lh |= (1 << 2) | (uint64(regenLen) << 4) | (2 << 60) - case inBits < 20: - lh |= (3 << 2) | (uint64(regenLen) << 4) | (3 << 60) - default: - panic(fmt.Errorf("internal error: block too big (%d)", regenLen)) - } - *h = literalsHeader(lh) -} - -// setSizes will set the size of a compressed literals section and the input length. -func (h *literalsHeader) setSizes(compLen, inLen int, single bool) { - compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen)) - // Only retain 2 bits - const mask = 3 - lh := uint64(*h & mask) - switch { - case compBits <= 10 && inBits <= 10: - if !single { - lh |= 1 << 2 - } - lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60) - if debugEncoder { - const mmask = (1 << 24) - 1 - n := (lh >> 4) & mmask - if int(n&1023) != inLen { - panic(fmt.Sprint("regensize:", int(n&1023), "!=", inLen, inBits)) - } - if int(n>>10) != compLen { - panic(fmt.Sprint("compsize:", int(n>>10), "!=", compLen, compBits)) - } - } - case compBits <= 14 && inBits <= 14: - lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60) - if single { - panic("single stream used with more than 10 bits length.") - } - case compBits <= 18 && inBits <= 18: - lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60) - if single { - panic("single stream used with more than 10 bits length.") - } - default: - panic("internal error: block too big") - } - *h = literalsHeader(lh) -} - -// appendTo will append the literals header to a byte slice. -func (h literalsHeader) appendTo(b []byte) []byte { - size := uint8(h >> 60) - switch size { - case 1: - b = append(b, uint8(h)) - case 2: - b = append(b, uint8(h), uint8(h>>8)) - case 3: - b = append(b, uint8(h), uint8(h>>8), uint8(h>>16)) - case 4: - b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24)) - case 5: - b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24), uint8(h>>32)) - default: - panic(fmt.Errorf("internal error: literalsHeader has invalid size (%d)", size)) - } - return b -} - -// size returns the output size with currently set values. -func (h literalsHeader) size() int { - return int(h >> 60) -} - -func (h literalsHeader) String() string { - return fmt.Sprintf("Type: %d, SizeFormat: %d, Size: 0x%d, Bytes:%d", literalsBlockType(h&3), (h>>2)&3, h&((1<<60)-1)>>4, h>>60) -} - -// pushOffsets will push the recent offsets to the backup store. -func (b *blockEnc) pushOffsets() { - b.prevRecentOffsets = b.recentOffsets -} - -// pushOffsets will push the recent offsets to the backup store. -func (b *blockEnc) popOffsets() { - b.recentOffsets = b.prevRecentOffsets -} - -// matchOffset will adjust recent offsets and return the adjusted one, -// if it matches a previous offset. -func (b *blockEnc) matchOffset(offset, lits uint32) uint32 { - // Check if offset is one of the recent offsets. - // Adjusts the output offset accordingly. - // Gives a tiny bit of compression, typically around 1%. - if true { - if lits > 0 { - switch offset { - case b.recentOffsets[0]: - offset = 1 - case b.recentOffsets[1]: - b.recentOffsets[1] = b.recentOffsets[0] - b.recentOffsets[0] = offset - offset = 2 - case b.recentOffsets[2]: - b.recentOffsets[2] = b.recentOffsets[1] - b.recentOffsets[1] = b.recentOffsets[0] - b.recentOffsets[0] = offset - offset = 3 - default: - b.recentOffsets[2] = b.recentOffsets[1] - b.recentOffsets[1] = b.recentOffsets[0] - b.recentOffsets[0] = offset - offset += 3 - } - } else { - switch offset { - case b.recentOffsets[1]: - b.recentOffsets[1] = b.recentOffsets[0] - b.recentOffsets[0] = offset - offset = 1 - case b.recentOffsets[2]: - b.recentOffsets[2] = b.recentOffsets[1] - b.recentOffsets[1] = b.recentOffsets[0] - b.recentOffsets[0] = offset - offset = 2 - case b.recentOffsets[0] - 1: - b.recentOffsets[2] = b.recentOffsets[1] - b.recentOffsets[1] = b.recentOffsets[0] - b.recentOffsets[0] = offset - offset = 3 - default: - b.recentOffsets[2] = b.recentOffsets[1] - b.recentOffsets[1] = b.recentOffsets[0] - b.recentOffsets[0] = offset - offset += 3 - } - } - } else { - offset += 3 - } - return offset -} - -// encodeRaw can be used to set the output to a raw representation of supplied bytes. -func (b *blockEnc) encodeRaw(a []byte) { - var bh blockHeader - bh.setLast(b.last) - bh.setSize(uint32(len(a))) - bh.setType(blockTypeRaw) - b.output = bh.appendTo(b.output[:0]) - b.output = append(b.output, a...) - if debugEncoder { - println("Adding RAW block, length", len(a), "last:", b.last) - } -} - -// encodeRaw can be used to set the output to a raw representation of supplied bytes. -func (b *blockEnc) encodeRawTo(dst, src []byte) []byte { - var bh blockHeader - bh.setLast(b.last) - bh.setSize(uint32(len(src))) - bh.setType(blockTypeRaw) - dst = bh.appendTo(dst) - dst = append(dst, src...) - if debugEncoder { - println("Adding RAW block, length", len(src), "last:", b.last) - } - return dst -} - -// encodeLits can be used if the block is only litLen. -func (b *blockEnc) encodeLits(lits []byte, raw bool) error { - var bh blockHeader - bh.setLast(b.last) - bh.setSize(uint32(len(lits))) - - // Don't compress extremely small blocks - if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw { - if debugEncoder { - println("Adding RAW block, length", len(lits), "last:", b.last) - } - bh.setType(blockTypeRaw) - b.output = bh.appendTo(b.output) - b.output = append(b.output, lits...) - return nil - } - - var ( - out []byte - reUsed, single bool - err error - ) - if b.dictLitEnc != nil { - b.litEnc.TransferCTable(b.dictLitEnc) - b.litEnc.Reuse = huff0.ReusePolicyAllow - b.dictLitEnc = nil - } - if len(lits) >= 1024 { - // Use 4 Streams. - out, reUsed, err = huff0.Compress4X(lits, b.litEnc) - } else if len(lits) > 16 { - // Use 1 stream - single = true - out, reUsed, err = huff0.Compress1X(lits, b.litEnc) - } else { - err = huff0.ErrIncompressible - } - if err == nil && len(out)+5 > len(lits) { - // If we are close, we may still be worse or equal to raw. - var lh literalsHeader - lh.setSizes(len(out), len(lits), single) - if len(out)+lh.size() >= len(lits) { - err = huff0.ErrIncompressible - } - } - switch err { - case huff0.ErrIncompressible: - if debugEncoder { - println("Adding RAW block, length", len(lits), "last:", b.last) - } - bh.setType(blockTypeRaw) - b.output = bh.appendTo(b.output) - b.output = append(b.output, lits...) - return nil - case huff0.ErrUseRLE: - if debugEncoder { - println("Adding RLE block, length", len(lits)) - } - bh.setType(blockTypeRLE) - b.output = bh.appendTo(b.output) - b.output = append(b.output, lits[0]) - return nil - case nil: - default: - return err - } - // Compressed... - // Now, allow reuse - b.litEnc.Reuse = huff0.ReusePolicyAllow - bh.setType(blockTypeCompressed) - var lh literalsHeader - if reUsed { - if debugEncoder { - println("Reused tree, compressed to", len(out)) - } - lh.setType(literalsBlockTreeless) - } else { - if debugEncoder { - println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable)) - } - lh.setType(literalsBlockCompressed) - } - // Set sizes - lh.setSizes(len(out), len(lits), single) - bh.setSize(uint32(len(out) + lh.size() + 1)) - - // Write block headers. - b.output = bh.appendTo(b.output) - b.output = lh.appendTo(b.output) - // Add compressed data. - b.output = append(b.output, out...) - // No sequences. - b.output = append(b.output, 0) - return nil -} - -// encodeRLE will encode an RLE block. -func (b *blockEnc) encodeRLE(val byte, length uint32) { - var bh blockHeader - bh.setLast(b.last) - bh.setSize(length) - bh.setType(blockTypeRLE) - b.output = bh.appendTo(b.output) - b.output = append(b.output, val) -} - -// fuzzFseEncoder can be used to fuzz the FSE encoder. -func fuzzFseEncoder(data []byte) int { - if len(data) > maxSequences || len(data) < 2 { - return 0 - } - enc := fseEncoder{} - hist := enc.Histogram() - maxSym := uint8(0) - for i, v := range data { - v = v & 63 - data[i] = v - hist[v]++ - if v > maxSym { - maxSym = v - } - } - if maxSym == 0 { - // All 0 - return 0 - } - cnt := int(slices.Max(hist[:maxSym])) - if cnt == len(data) { - // RLE - return 0 - } - enc.HistogramFinished(maxSym, cnt) - err := enc.normalizeCount(len(data)) - if err != nil { - return 0 - } - _, err = enc.writeCount(nil) - if err != nil { - panic(err) - } - return 1 -} - -// encode will encode the block and append the output in b.output. -// Previous offset codes must be pushed if more blocks are expected. -func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { - if len(b.sequences) == 0 { - return b.encodeLits(b.literals, rawAllLits) - } - if len(b.sequences) == 1 && len(org) > 0 && len(b.literals) <= 1 { - // Check common RLE cases. - seq := b.sequences[0] - if seq.litLen == uint32(len(b.literals)) && seq.offset-3 == 1 { - // Offset == 1 and 0 or 1 literals. - b.encodeRLE(org[0], b.sequences[0].matchLen+zstdMinMatch+seq.litLen) - return nil - } - } - - // We want some difference to at least account for the headers. - saved := b.size - len(b.literals) - (b.size >> 6) - if saved < 16 { - if org == nil { - return errIncompressible - } - b.popOffsets() - return b.encodeLits(org, rawAllLits) - } - - var bh blockHeader - var lh literalsHeader - bh.setLast(b.last) - bh.setType(blockTypeCompressed) - // Store offset of the block header. Needed when we know the size. - bhOffset := len(b.output) - b.output = bh.appendTo(b.output) - - var ( - out []byte - reUsed, single bool - err error - ) - if b.dictLitEnc != nil { - b.litEnc.TransferCTable(b.dictLitEnc) - b.litEnc.Reuse = huff0.ReusePolicyAllow - b.dictLitEnc = nil - } - if len(b.literals) >= 1024 && !raw { - // Use 4 Streams. - out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) - } else if len(b.literals) > 16 && !raw { - // Use 1 stream - single = true - out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) - } else { - err = huff0.ErrIncompressible - } - - if err == nil && len(out)+5 > len(b.literals) { - // If we are close, we may still be worse or equal to raw. - var lh literalsHeader - lh.setSize(len(b.literals)) - szRaw := lh.size() - lh.setSizes(len(out), len(b.literals), single) - szComp := lh.size() - if len(out)+szComp >= len(b.literals)+szRaw { - err = huff0.ErrIncompressible - } - } - switch err { - case huff0.ErrIncompressible: - lh.setType(literalsBlockRaw) - lh.setSize(len(b.literals)) - b.output = lh.appendTo(b.output) - b.output = append(b.output, b.literals...) - if debugEncoder { - println("Adding literals RAW, length", len(b.literals)) - } - case huff0.ErrUseRLE: - lh.setType(literalsBlockRLE) - lh.setSize(len(b.literals)) - b.output = lh.appendTo(b.output) - b.output = append(b.output, b.literals[0]) - if debugEncoder { - println("Adding literals RLE") - } - case nil: - // Compressed litLen... - if reUsed { - if debugEncoder { - println("reused tree") - } - lh.setType(literalsBlockTreeless) - } else { - if debugEncoder { - println("new tree, size:", len(b.litEnc.OutTable)) - } - lh.setType(literalsBlockCompressed) - if debugEncoder { - _, _, err := huff0.ReadTable(out, nil) - if err != nil { - panic(err) - } - } - } - lh.setSizes(len(out), len(b.literals), single) - if debugEncoder { - printf("Compressed %d literals to %d bytes", len(b.literals), len(out)) - println("Adding literal header:", lh) - } - b.output = lh.appendTo(b.output) - b.output = append(b.output, out...) - b.litEnc.Reuse = huff0.ReusePolicyAllow - if debugEncoder { - println("Adding literals compressed") - } - default: - if debugEncoder { - println("Adding literals ERROR:", err) - } - return err - } - // Sequence compression - - // Write the number of sequences - switch { - case len(b.sequences) < 128: - b.output = append(b.output, uint8(len(b.sequences))) - case len(b.sequences) < 0x7f00: // TODO: this could be wrong - n := len(b.sequences) - b.output = append(b.output, 128+uint8(n>>8), uint8(n)) - default: - n := len(b.sequences) - 0x7f00 - b.output = append(b.output, 255, uint8(n), uint8(n>>8)) - } - if debugEncoder { - println("Encoding", len(b.sequences), "sequences") - } - b.genCodes() - llEnc := b.coders.llEnc - ofEnc := b.coders.ofEnc - mlEnc := b.coders.mlEnc - err = llEnc.normalizeCount(len(b.sequences)) - if err != nil { - return err - } - err = ofEnc.normalizeCount(len(b.sequences)) - if err != nil { - return err - } - err = mlEnc.normalizeCount(len(b.sequences)) - if err != nil { - return err - } - - // Choose the best compression mode for each type. - // Will evaluate the new vs predefined and previous. - chooseComp := func(cur, prev, preDef *fseEncoder) (*fseEncoder, seqCompMode) { - // See if predefined/previous is better - hist := cur.count[:cur.symbolLen] - nSize := cur.approxSize(hist) + cur.maxHeaderSize() - predefSize := preDef.approxSize(hist) - prevSize := prev.approxSize(hist) - - // Add a small penalty for new encoders. - // Don't bother with extremely small (<2 byte gains). - nSize = nSize + (nSize+2*8*16)>>4 - switch { - case predefSize <= prevSize && predefSize <= nSize || forcePreDef: - if debugEncoder { - println("Using predefined", predefSize>>3, "<=", nSize>>3) - } - return preDef, compModePredefined - case prevSize <= nSize: - if debugEncoder { - println("Using previous", prevSize>>3, "<=", nSize>>3) - } - return prev, compModeRepeat - default: - if debugEncoder { - println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes") - println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen]) - } - return cur, compModeFSE - } - } - - // Write compression mode - var mode uint8 - if llEnc.useRLE { - mode |= uint8(compModeRLE) << 6 - llEnc.setRLE(b.sequences[0].llCode) - if debugEncoder { - println("llEnc.useRLE") - } - } else { - var m seqCompMode - llEnc, m = chooseComp(llEnc, b.coders.llPrev, &fsePredefEnc[tableLiteralLengths]) - mode |= uint8(m) << 6 - } - if ofEnc.useRLE { - mode |= uint8(compModeRLE) << 4 - ofEnc.setRLE(b.sequences[0].ofCode) - if debugEncoder { - println("ofEnc.useRLE") - } - } else { - var m seqCompMode - ofEnc, m = chooseComp(ofEnc, b.coders.ofPrev, &fsePredefEnc[tableOffsets]) - mode |= uint8(m) << 4 - } - - if mlEnc.useRLE { - mode |= uint8(compModeRLE) << 2 - mlEnc.setRLE(b.sequences[0].mlCode) - if debugEncoder { - println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen) - } - } else { - var m seqCompMode - mlEnc, m = chooseComp(mlEnc, b.coders.mlPrev, &fsePredefEnc[tableMatchLengths]) - mode |= uint8(m) << 2 - } - b.output = append(b.output, mode) - if debugEncoder { - printf("Compression modes: 0b%b", mode) - } - b.output, err = llEnc.writeCount(b.output) - if err != nil { - return err - } - start := len(b.output) - b.output, err = ofEnc.writeCount(b.output) - if err != nil { - return err - } - if false { - println("block:", b.output[start:], "tablelog", ofEnc.actualTableLog, "maxcount:", ofEnc.maxCount) - fmt.Printf("selected TableLog: %d, Symbol length: %d\n", ofEnc.actualTableLog, ofEnc.symbolLen) - for i, v := range ofEnc.norm[:ofEnc.symbolLen] { - fmt.Printf("%3d: %5d -> %4d \n", i, ofEnc.count[i], v) - } - } - b.output, err = mlEnc.writeCount(b.output) - if err != nil { - return err - } - - // Maybe in block? - wr := &b.wr - wr.reset(b.output) - - var ll, of, ml cState - - // Current sequence - seq := len(b.sequences) - 1 - s := b.sequences[seq] - llEnc.setBits(llBitsTable[:]) - mlEnc.setBits(mlBitsTable[:]) - ofEnc.setBits(nil) - - llTT, ofTT, mlTT := llEnc.ct.symbolTT[:256], ofEnc.ct.symbolTT[:256], mlEnc.ct.symbolTT[:256] - - // We have 3 bounds checks here (and in the loop). - // Since we are iterating backwards it is kinda hard to avoid. - llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode] - ll.init(wr, &llEnc.ct, llB) - of.init(wr, &ofEnc.ct, ofB) - wr.flush32() - ml.init(wr, &mlEnc.ct, mlB) - - // Each of these lookups also generates a bounds check. - wr.addBits32NC(s.litLen, llB.outBits) - wr.addBits32NC(s.matchLen, mlB.outBits) - wr.flush32() - wr.addBits32NC(s.offset, ofB.outBits) - if debugSequences { - println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB) - } - seq-- - // Store sequences in reverse... - for seq >= 0 { - s = b.sequences[seq] - - ofB := ofTT[s.ofCode] - wr.flush32() // tablelog max is below 8 for each, so it will fill max 24 bits. - //of.encode(ofB) - nbBitsOut := (uint32(of.state) + ofB.deltaNbBits) >> 16 - dstState := int32(of.state>>(nbBitsOut&15)) + int32(ofB.deltaFindState) - wr.addBits16NC(of.state, uint8(nbBitsOut)) - of.state = of.stateTable[dstState] - - // Accumulate extra bits. - outBits := ofB.outBits & 31 - extraBits := uint64(s.offset & bitMask32[outBits]) - extraBitsN := outBits - - mlB := mlTT[s.mlCode] - //ml.encode(mlB) - nbBitsOut = (uint32(ml.state) + mlB.deltaNbBits) >> 16 - dstState = int32(ml.state>>(nbBitsOut&15)) + int32(mlB.deltaFindState) - wr.addBits16NC(ml.state, uint8(nbBitsOut)) - ml.state = ml.stateTable[dstState] - - outBits = mlB.outBits & 31 - extraBits = extraBits<> 16 - dstState = int32(ll.state>>(nbBitsOut&15)) + int32(llB.deltaFindState) - wr.addBits16NC(ll.state, uint8(nbBitsOut)) - ll.state = ll.stateTable[dstState] - - outBits = llB.outBits & 31 - extraBits = extraBits<= b.size { - // Discard and encode as raw block. - b.output = b.encodeRawTo(b.output[:bhOffset], org) - b.popOffsets() - b.litEnc.Reuse = huff0.ReusePolicyNone - return nil - } - - // Size is output minus block header. - bh.setSize(uint32(len(b.output)-bhOffset) - 3) - if debugEncoder { - println("Rewriting block header", bh) - } - _ = bh.appendTo(b.output[bhOffset:bhOffset]) - b.coders.setPrev(llEnc, mlEnc, ofEnc) - return nil -} - -var errIncompressible = errors.New("incompressible") - -func (b *blockEnc) genCodes() { - if len(b.sequences) == 0 { - // nothing to do - return - } - if len(b.sequences) > math.MaxUint16 { - panic("can only encode up to 64K sequences") - } - // No bounds checks after here: - llH := b.coders.llEnc.Histogram() - ofH := b.coders.ofEnc.Histogram() - mlH := b.coders.mlEnc.Histogram() - for i := range llH { - llH[i] = 0 - } - for i := range ofH { - ofH[i] = 0 - } - for i := range mlH { - mlH[i] = 0 - } - - var llMax, ofMax, mlMax uint8 - for i := range b.sequences { - seq := &b.sequences[i] - v := llCode(seq.litLen) - seq.llCode = v - llH[v]++ - if v > llMax { - llMax = v - } - - v = ofCode(seq.offset) - seq.ofCode = v - ofH[v]++ - if v > ofMax { - ofMax = v - } - - v = mlCode(seq.matchLen) - seq.mlCode = v - mlH[v]++ - if v > mlMax { - mlMax = v - if debugAsserts && mlMax > maxMatchLengthSymbol { - panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen)) - } - } - } - if debugAsserts && mlMax > maxMatchLengthSymbol { - panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax)) - } - if debugAsserts && ofMax > maxOffsetBits { - panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax)) - } - if debugAsserts && llMax > maxLiteralLengthSymbol { - panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax)) - } - - b.coders.mlEnc.HistogramFinished(mlMax, int(slices.Max(mlH[:mlMax+1]))) - b.coders.ofEnc.HistogramFinished(ofMax, int(slices.Max(ofH[:ofMax+1]))) - b.coders.llEnc.HistogramFinished(llMax, int(slices.Max(llH[:llMax+1]))) -} diff --git a/vendor/github.com/klauspost/compress/zstd/blocktype_string.go b/vendor/github.com/klauspost/compress/zstd/blocktype_string.go deleted file mode 100644 index 01a01e486..000000000 --- a/vendor/github.com/klauspost/compress/zstd/blocktype_string.go +++ /dev/null @@ -1,85 +0,0 @@ -// Code generated by "stringer -type=blockType,literalsBlockType,seqCompMode,tableIndex"; DO NOT EDIT. - -package zstd - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[blockTypeRaw-0] - _ = x[blockTypeRLE-1] - _ = x[blockTypeCompressed-2] - _ = x[blockTypeReserved-3] -} - -const _blockType_name = "blockTypeRawblockTypeRLEblockTypeCompressedblockTypeReserved" - -var _blockType_index = [...]uint8{0, 12, 24, 43, 60} - -func (i blockType) String() string { - if i >= blockType(len(_blockType_index)-1) { - return "blockType(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _blockType_name[_blockType_index[i]:_blockType_index[i+1]] -} -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[literalsBlockRaw-0] - _ = x[literalsBlockRLE-1] - _ = x[literalsBlockCompressed-2] - _ = x[literalsBlockTreeless-3] -} - -const _literalsBlockType_name = "literalsBlockRawliteralsBlockRLEliteralsBlockCompressedliteralsBlockTreeless" - -var _literalsBlockType_index = [...]uint8{0, 16, 32, 55, 76} - -func (i literalsBlockType) String() string { - if i >= literalsBlockType(len(_literalsBlockType_index)-1) { - return "literalsBlockType(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _literalsBlockType_name[_literalsBlockType_index[i]:_literalsBlockType_index[i+1]] -} -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[compModePredefined-0] - _ = x[compModeRLE-1] - _ = x[compModeFSE-2] - _ = x[compModeRepeat-3] -} - -const _seqCompMode_name = "compModePredefinedcompModeRLEcompModeFSEcompModeRepeat" - -var _seqCompMode_index = [...]uint8{0, 18, 29, 40, 54} - -func (i seqCompMode) String() string { - if i >= seqCompMode(len(_seqCompMode_index)-1) { - return "seqCompMode(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _seqCompMode_name[_seqCompMode_index[i]:_seqCompMode_index[i+1]] -} -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[tableLiteralLengths-0] - _ = x[tableOffsets-1] - _ = x[tableMatchLengths-2] -} - -const _tableIndex_name = "tableLiteralLengthstableOffsetstableMatchLengths" - -var _tableIndex_index = [...]uint8{0, 19, 31, 48} - -func (i tableIndex) String() string { - if i >= tableIndex(len(_tableIndex_index)-1) { - return "tableIndex(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _tableIndex_name[_tableIndex_index[i]:_tableIndex_index[i+1]] -} diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go deleted file mode 100644 index 55a388553..000000000 --- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "fmt" - "io" -) - -type byteBuffer interface { - // Read up to 8 bytes. - // Returns io.ErrUnexpectedEOF if this cannot be satisfied. - readSmall(n int) ([]byte, error) - - // Read >8 bytes. - // MAY use the destination slice. - readBig(n int, dst []byte) ([]byte, error) - - // Read a single byte. - readByte() (byte, error) - - // Skip n bytes. - skipN(n int64) error -} - -// in-memory buffer -type byteBuf []byte - -func (b *byteBuf) readSmall(n int) ([]byte, error) { - if debugAsserts && n > 8 { - panic(fmt.Errorf("small read > 8 (%d). use readBig", n)) - } - bb := *b - if len(bb) < n { - return nil, io.ErrUnexpectedEOF - } - r := bb[:n] - *b = bb[n:] - return r, nil -} - -func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) { - bb := *b - if len(bb) < n { - return nil, io.ErrUnexpectedEOF - } - r := bb[:n] - *b = bb[n:] - return r, nil -} - -func (b *byteBuf) readByte() (byte, error) { - bb := *b - if len(bb) < 1 { - return 0, io.ErrUnexpectedEOF - } - r := bb[0] - *b = bb[1:] - return r, nil -} - -func (b *byteBuf) skipN(n int64) error { - bb := *b - if n < 0 { - return fmt.Errorf("negative skip (%d) requested", n) - } - if int64(len(bb)) < n { - return io.ErrUnexpectedEOF - } - *b = bb[n:] - return nil -} - -// wrapper around a reader. -type readerWrapper struct { - r io.Reader - tmp [8]byte -} - -func (r *readerWrapper) readSmall(n int) ([]byte, error) { - if debugAsserts && n > 8 { - panic(fmt.Errorf("small read > 8 (%d). use readBig", n)) - } - n2, err := io.ReadFull(r.r, r.tmp[:n]) - // We only really care about the actual bytes read. - if err != nil { - if err == io.EOF { - return nil, io.ErrUnexpectedEOF - } - if debugDecoder { - println("readSmall: got", n2, "want", n, "err", err) - } - return nil, err - } - return r.tmp[:n], nil -} - -func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) { - if cap(dst) < n { - dst = make([]byte, n) - } - n2, err := io.ReadFull(r.r, dst[:n]) - if err == io.EOF && n > 0 { - err = io.ErrUnexpectedEOF - } - return dst[:n2], err -} - -func (r *readerWrapper) readByte() (byte, error) { - n2, err := io.ReadFull(r.r, r.tmp[:1]) - if err != nil { - if err == io.EOF { - err = io.ErrUnexpectedEOF - } - return 0, err - } - if n2 != 1 { - return 0, io.ErrUnexpectedEOF - } - return r.tmp[0], nil -} - -func (r *readerWrapper) skipN(n int64) error { - n2, err := io.CopyN(io.Discard, r.r, n) - if n2 != n { - err = io.ErrUnexpectedEOF - } - return err -} diff --git a/vendor/github.com/klauspost/compress/zstd/bytereader.go b/vendor/github.com/klauspost/compress/zstd/bytereader.go deleted file mode 100644 index 0e59a242d..000000000 --- a/vendor/github.com/klauspost/compress/zstd/bytereader.go +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -// byteReader provides a byte reader that reads -// little endian values from a byte stream. -// The input stream is manually advanced. -// The reader performs no bounds checks. -type byteReader struct { - b []byte - off int -} - -// advance the stream b n bytes. -func (b *byteReader) advance(n uint) { - b.off += int(n) -} - -// overread returns whether we have advanced too far. -func (b *byteReader) overread() bool { - return b.off > len(b.b) -} - -// Int32 returns a little endian int32 starting at current offset. -func (b byteReader) Int32() int32 { - b2 := b.b[b.off:] - b2 = b2[:4] - v3 := int32(b2[3]) - v2 := int32(b2[2]) - v1 := int32(b2[1]) - v0 := int32(b2[0]) - return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) -} - -// Uint8 returns the next byte -func (b *byteReader) Uint8() uint8 { - v := b.b[b.off] - return v -} - -// Uint32 returns a little endian uint32 starting at current offset. -func (b byteReader) Uint32() uint32 { - if r := b.remain(); r < 4 { - // Very rare - v := uint32(0) - for i := 1; i <= r; i++ { - v = (v << 8) | uint32(b.b[len(b.b)-i]) - } - return v - } - b2 := b.b[b.off:] - b2 = b2[:4] - v3 := uint32(b2[3]) - v2 := uint32(b2[2]) - v1 := uint32(b2[1]) - v0 := uint32(b2[0]) - return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) -} - -// Uint32NC returns a little endian uint32 starting at current offset. -// The caller must be sure if there are at least 4 bytes left. -func (b byteReader) Uint32NC() uint32 { - b2 := b.b[b.off:] - b2 = b2[:4] - v3 := uint32(b2[3]) - v2 := uint32(b2[2]) - v1 := uint32(b2[1]) - v0 := uint32(b2[0]) - return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) -} - -// unread returns the unread portion of the input. -func (b byteReader) unread() []byte { - return b.b[b.off:] -} - -// remain will return the number of bytes remaining. -func (b byteReader) remain() int { - return len(b.b) - b.off -} diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go deleted file mode 100644 index 6a5a2988b..000000000 --- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright 2020+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. - -package zstd - -import ( - "encoding/binary" - "errors" - "io" -) - -// HeaderMaxSize is the maximum size of a Frame and Block Header. -// If less is sent to Header.Decode it *may* still contain enough information. -const HeaderMaxSize = 14 + 3 - -// Header contains information about the first frame and block within that. -type Header struct { - // SingleSegment specifies whether the data is to be decompressed into a - // single contiguous memory segment. - // It implies that WindowSize is invalid and that FrameContentSize is valid. - SingleSegment bool - - // WindowSize is the window of data to keep while decoding. - // Will only be set if SingleSegment is false. - WindowSize uint64 - - // Dictionary ID. - // If 0, no dictionary. - DictionaryID uint32 - - // HasFCS specifies whether FrameContentSize has a valid value. - HasFCS bool - - // FrameContentSize is the expected uncompressed size of the entire frame. - FrameContentSize uint64 - - // Skippable will be true if the frame is meant to be skipped. - // This implies that FirstBlock.OK is false. - Skippable bool - - // SkippableID is the user-specific ID for the skippable frame. - // Valid values are between 0 to 15, inclusive. - SkippableID int - - // SkippableSize is the length of the user data to skip following - // the header. - SkippableSize uint32 - - // HeaderSize is the raw size of the frame header. - // - // For normal frames, it includes the size of the magic number and - // the size of the header (per section 3.1.1.1). - // It does not include the size for any data blocks (section 3.1.1.2) nor - // the size for the trailing content checksum. - // - // For skippable frames, this counts the size of the magic number - // along with the size of the size field of the payload. - // It does not include the size of the skippable payload itself. - // The total frame size is the HeaderSize plus the SkippableSize. - HeaderSize int - - // First block information. - FirstBlock struct { - // OK will be set if first block could be decoded. - OK bool - - // Is this the last block of a frame? - Last bool - - // Is the data compressed? - // If true CompressedSize will be populated. - // Unfortunately DecompressedSize cannot be determined - // without decoding the blocks. - Compressed bool - - // DecompressedSize is the expected decompressed size of the block. - // Will be 0 if it cannot be determined. - DecompressedSize int - - // CompressedSize of the data in the block. - // Does not include the block header. - // Will be equal to DecompressedSize if not Compressed. - CompressedSize int - } - - // If set there is a checksum present for the block content. - // The checksum field at the end is always 4 bytes long. - HasCheckSum bool -} - -// Decode the header from the beginning of the stream. -// This will decode the frame header and the first block header if enough bytes are provided. -// It is recommended to provide at least HeaderMaxSize bytes. -// If the frame header cannot be read an error will be returned. -// If there isn't enough input, io.ErrUnexpectedEOF is returned. -// The FirstBlock.OK will indicate if enough information was available to decode the first block header. -func (h *Header) Decode(in []byte) error { - _, err := h.DecodeAndStrip(in) - return err -} - -// DecodeAndStrip will decode the header from the beginning of the stream -// and on success return the remaining bytes. -// This will decode the frame header and the first block header if enough bytes are provided. -// It is recommended to provide at least HeaderMaxSize bytes. -// If the frame header cannot be read an error will be returned. -// If there isn't enough input, io.ErrUnexpectedEOF is returned. -// The FirstBlock.OK will indicate if enough information was available to decode the first block header. -func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) { - *h = Header{} - if len(in) < 4 { - return nil, io.ErrUnexpectedEOF - } - h.HeaderSize += 4 - b, in := in[:4], in[4:] - if string(b) != frameMagic { - if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 { - return nil, ErrMagicMismatch - } - if len(in) < 4 { - return nil, io.ErrUnexpectedEOF - } - h.HeaderSize += 4 - h.Skippable = true - h.SkippableID = int(b[0] & 0xf) - h.SkippableSize = binary.LittleEndian.Uint32(in) - return in[4:], nil - } - - // Read Window_Descriptor - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor - if len(in) < 1 { - return nil, io.ErrUnexpectedEOF - } - fhd, in := in[0], in[1:] - h.HeaderSize++ - h.SingleSegment = fhd&(1<<5) != 0 - h.HasCheckSum = fhd&(1<<2) != 0 - if fhd&(1<<3) != 0 { - return nil, errors.New("reserved bit set on frame header") - } - - if !h.SingleSegment { - if len(in) < 1 { - return nil, io.ErrUnexpectedEOF - } - var wd byte - wd, in = in[0], in[1:] - h.HeaderSize++ - windowLog := 10 + (wd >> 3) - windowBase := uint64(1) << windowLog - windowAdd := (windowBase / 8) * uint64(wd&0x7) - h.WindowSize = windowBase + windowAdd - } - - // Read Dictionary_ID - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id - if size := fhd & 3; size != 0 { - if size == 3 { - size = 4 - } - if len(in) < int(size) { - return nil, io.ErrUnexpectedEOF - } - b, in = in[:size], in[size:] - h.HeaderSize += int(size) - switch len(b) { - case 1: - h.DictionaryID = uint32(b[0]) - case 2: - h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) - case 4: - h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) - } - } - - // Read Frame_Content_Size - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size - var fcsSize int - v := fhd >> 6 - switch v { - case 0: - if h.SingleSegment { - fcsSize = 1 - } - default: - fcsSize = 1 << v - } - - if fcsSize > 0 { - h.HasFCS = true - if len(in) < fcsSize { - return nil, io.ErrUnexpectedEOF - } - b, in = in[:fcsSize], in[fcsSize:] - h.HeaderSize += int(fcsSize) - switch len(b) { - case 1: - h.FrameContentSize = uint64(b[0]) - case 2: - // When FCS_Field_Size is 2, the offset of 256 is added. - h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256 - case 4: - h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24) - case 8: - d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) - d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24) - h.FrameContentSize = uint64(d1) | (uint64(d2) << 32) - } - } - - // Frame Header done, we will not fail from now on. - if len(in) < 3 { - return in, nil - } - tmp := in[:3] - bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) - h.FirstBlock.Last = bh&1 != 0 - blockType := blockType((bh >> 1) & 3) - // find size. - cSize := int(bh >> 3) - switch blockType { - case blockTypeReserved: - return in, nil - case blockTypeRLE: - h.FirstBlock.Compressed = true - h.FirstBlock.DecompressedSize = cSize - h.FirstBlock.CompressedSize = 1 - case blockTypeCompressed: - h.FirstBlock.Compressed = true - h.FirstBlock.CompressedSize = cSize - case blockTypeRaw: - h.FirstBlock.DecompressedSize = cSize - h.FirstBlock.CompressedSize = cSize - default: - panic("Invalid block type") - } - - h.FirstBlock.OK = true - return in, nil -} - -// AppendTo will append the encoded header to the dst slice. -// There is no error checking performed on the header values. -func (h *Header) AppendTo(dst []byte) ([]byte, error) { - if h.Skippable { - magic := [4]byte{0x50, 0x2a, 0x4d, 0x18} - magic[0] |= byte(h.SkippableID & 0xf) - dst = append(dst, magic[:]...) - f := h.SkippableSize - return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil - } - f := frameHeader{ - ContentSize: h.FrameContentSize, - WindowSize: uint32(h.WindowSize), - SingleSegment: h.SingleSegment, - Checksum: h.HasCheckSum, - DictID: h.DictionaryID, - } - return f.appendTo(dst), nil -} diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go deleted file mode 100644 index ea2a19376..000000000 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ /dev/null @@ -1,949 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "context" - "encoding/binary" - "io" - "sync" - - "github.com/klauspost/compress/zstd/internal/xxhash" -) - -// Decoder provides decoding of zstandard streams. -// The decoder has been designed to operate without allocations after a warmup. -// This means that you should store the decoder for best performance. -// To re-use a stream decoder, use the Reset(r io.Reader) error to switch to another stream. -// A decoder can safely be re-used even if the previous stream failed. -// To release the resources, you must call the Close() function on a decoder. -type Decoder struct { - o decoderOptions - - // Unreferenced decoders, ready for use. - decoders chan *blockDec - - // Current read position used for Reader functionality. - current decoderState - - // sync stream decoding - syncStream struct { - decodedFrame uint64 - br readerWrapper - enabled bool - inFrame bool - dstBuf []byte - } - - frame *frameDec - - // Custom dictionaries. - dicts map[uint32]*dict - - // streamWg is the waitgroup for all streams - streamWg sync.WaitGroup -} - -// decoderState is used for maintaining state when the decoder -// is used for streaming. -type decoderState struct { - // current block being written to stream. - decodeOutput - - // output in order to be written to stream. - output chan decodeOutput - - // cancel remaining output. - cancel context.CancelFunc - - // crc of current frame - crc *xxhash.Digest - - flushed bool -} - -var ( - // Check the interfaces we want to support. - _ = io.WriterTo(&Decoder{}) - _ = io.Reader(&Decoder{}) -) - -// NewReader creates a new decoder. -// A nil Reader can be provided in which case Reset can be used to start a decode. -// -// A Decoder can be used in two modes: -// -// 1) As a stream, or -// 2) For stateless decoding using DecodeAll. -// -// Only a single stream can be decoded concurrently, but the same decoder -// can run multiple concurrent stateless decodes. It is even possible to -// use stateless decodes while a stream is being decoded. -// -// The Reset function can be used to initiate a new stream, which will considerably -// reduce the allocations normally caused by NewReader. -func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { - initPredefined() - var d Decoder - d.o.setDefault() - for _, o := range opts { - err := o(&d.o) - if err != nil { - return nil, err - } - } - d.current.crc = xxhash.New() - d.current.flushed = true - - if r == nil { - d.current.err = ErrDecoderNilInput - } - - // Transfer option dicts. - d.dicts = make(map[uint32]*dict, len(d.o.dicts)) - for _, dc := range d.o.dicts { - d.dicts[dc.id] = dc - } - d.o.dicts = nil - - // Create decoders - d.decoders = make(chan *blockDec, d.o.concurrent) - for i := 0; i < d.o.concurrent; i++ { - dec := newBlockDec(d.o.lowMem) - dec.localFrame = newFrameDec(d.o) - d.decoders <- dec - } - - if r == nil { - return &d, nil - } - return &d, d.Reset(r) -} - -// Read bytes from the decompressed stream into p. -// Returns the number of bytes read and any error that occurred. -// When the stream is done, io.EOF will be returned. -func (d *Decoder) Read(p []byte) (int, error) { - var n int - for { - if len(d.current.b) > 0 { - filled := copy(p, d.current.b) - p = p[filled:] - d.current.b = d.current.b[filled:] - n += filled - } - if len(p) == 0 { - break - } - if len(d.current.b) == 0 { - // We have an error and no more data - if d.current.err != nil { - break - } - if !d.nextBlock(n == 0) { - return n, d.current.err - } - } - } - if len(d.current.b) > 0 { - if debugDecoder { - println("returning", n, "still bytes left:", len(d.current.b)) - } - // Only return error at end of block - return n, nil - } - if d.current.err != nil { - d.drainOutput() - } - if debugDecoder { - println("returning", n, d.current.err, len(d.decoders)) - } - return n, d.current.err -} - -// Reset will reset the decoder the supplied stream after the current has finished processing. -// Note that this functionality cannot be used after Close has been called. -// Reset can be called with a nil reader to release references to the previous reader. -// After being called with a nil reader, no other operations than Reset or DecodeAll or Close -// should be used. -func (d *Decoder) Reset(r io.Reader) error { - if d.current.err == ErrDecoderClosed { - return d.current.err - } - - d.drainOutput() - - d.syncStream.br.r = nil - if r == nil { - d.current.err = ErrDecoderNilInput - if len(d.current.b) > 0 { - d.current.b = d.current.b[:0] - } - d.current.flushed = true - return nil - } - - // If bytes buffer and < 5MB, do sync decoding anyway. - if bb, ok := r.(byter); ok && bb.Len() < d.o.decodeBufsBelow && !d.o.limitToCap { - bb2 := bb - if debugDecoder { - println("*bytes.Buffer detected, doing sync decode, len:", bb.Len()) - } - b := bb2.Bytes() - var dst []byte - if cap(d.syncStream.dstBuf) > 0 { - dst = d.syncStream.dstBuf[:0] - } - - dst, err := d.DecodeAll(b, dst) - if err == nil { - err = io.EOF - } - // Save output buffer - d.syncStream.dstBuf = dst - d.current.b = dst - d.current.err = err - d.current.flushed = true - if debugDecoder { - println("sync decode to", len(dst), "bytes, err:", err) - } - return nil - } - // Remove current block. - d.stashDecoder() - d.current.decodeOutput = decodeOutput{} - d.current.err = nil - d.current.flushed = false - d.current.d = nil - d.syncStream.dstBuf = nil - - // Ensure no-one else is still running... - d.streamWg.Wait() - if d.frame == nil { - d.frame = newFrameDec(d.o) - } - - if d.o.concurrent == 1 { - return d.startSyncDecoder(r) - } - - d.current.output = make(chan decodeOutput, d.o.concurrent) - ctx, cancel := context.WithCancel(context.Background()) - d.current.cancel = cancel - d.streamWg.Add(1) - go d.startStreamDecoder(ctx, r, d.current.output) - - return nil -} - -// drainOutput will drain the output until errEndOfStream is sent. -func (d *Decoder) drainOutput() { - if d.current.cancel != nil { - if debugDecoder { - println("cancelling current") - } - d.current.cancel() - d.current.cancel = nil - } - if d.current.d != nil { - if debugDecoder { - printf("re-adding current decoder %p, decoders: %d", d.current.d, len(d.decoders)) - } - d.decoders <- d.current.d - d.current.d = nil - d.current.b = nil - } - if d.current.output == nil || d.current.flushed { - println("current already flushed") - return - } - for v := range d.current.output { - if v.d != nil { - if debugDecoder { - printf("re-adding decoder %p", v.d) - } - d.decoders <- v.d - } - } - d.current.output = nil - d.current.flushed = true -} - -// WriteTo writes data to w until there's no more data to write or when an error occurs. -// The return value n is the number of bytes written. -// Any error encountered during the write is also returned. -func (d *Decoder) WriteTo(w io.Writer) (int64, error) { - var n int64 - for { - if len(d.current.b) > 0 { - n2, err2 := w.Write(d.current.b) - n += int64(n2) - if err2 != nil && (d.current.err == nil || d.current.err == io.EOF) { - d.current.err = err2 - } else if n2 != len(d.current.b) { - d.current.err = io.ErrShortWrite - } - } - if d.current.err != nil { - break - } - d.nextBlock(true) - } - err := d.current.err - if err != nil { - d.drainOutput() - } - if err == io.EOF { - err = nil - } - return n, err -} - -// DecodeAll allows stateless decoding of a blob of bytes. -// Output will be appended to dst, so if the destination size is known -// you can pre-allocate the destination slice to avoid allocations. -// DecodeAll can be used concurrently. -// The Decoder concurrency limits will be respected. -func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { - if d.decoders == nil { - return dst, ErrDecoderClosed - } - - // Grab a block decoder and frame decoder. - block := <-d.decoders - frame := block.localFrame - initialSize := len(dst) - defer func() { - if debugDecoder { - printf("re-adding decoder: %p", block) - } - frame.rawInput = nil - frame.bBuf = nil - if frame.history.decoders.br != nil { - frame.history.decoders.br.in = nil - frame.history.decoders.br.cursor = 0 - } - d.decoders <- block - }() - frame.bBuf = input - - for { - frame.history.reset() - err := frame.reset(&frame.bBuf) - if err != nil { - if err == io.EOF { - if debugDecoder { - println("frame reset return EOF") - } - return dst, nil - } - return dst, err - } - if err = d.setDict(frame); err != nil { - return nil, err - } - if frame.WindowSize > d.o.maxWindowSize { - if debugDecoder { - println("window size exceeded:", frame.WindowSize, ">", d.o.maxWindowSize) - } - return dst, ErrWindowSizeExceeded - } - if frame.FrameContentSize != fcsUnknown { - if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)-initialSize) { - if debugDecoder { - println("decoder size exceeded; fcs:", frame.FrameContentSize, "> mcs:", d.o.maxDecodedSize-uint64(len(dst)-initialSize), "len:", len(dst)) - } - return dst, ErrDecoderSizeExceeded - } - if d.o.limitToCap && frame.FrameContentSize > uint64(cap(dst)-len(dst)) { - if debugDecoder { - println("decoder size exceeded; fcs:", frame.FrameContentSize, "> (cap-len)", cap(dst)-len(dst)) - } - return dst, ErrDecoderSizeExceeded - } - if cap(dst)-len(dst) < int(frame.FrameContentSize) { - dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)+compressedBlockOverAlloc) - copy(dst2, dst) - dst = dst2 - } - } - - if cap(dst) == 0 && !d.o.limitToCap { - // Allocate len(input) * 2 by default if nothing is provided - // and we didn't get frame content size. - size := len(input) * 2 - // Cap to 1 MB. - if size > 1<<20 { - size = 1 << 20 - } - if uint64(size) > d.o.maxDecodedSize { - size = int(d.o.maxDecodedSize) - } - dst = make([]byte, 0, size) - } - - dst, err = frame.runDecoder(dst, block) - if err != nil { - return dst, err - } - if uint64(len(dst)-initialSize) > d.o.maxDecodedSize { - return dst, ErrDecoderSizeExceeded - } - if len(frame.bBuf) == 0 { - if debugDecoder { - println("frame dbuf empty") - } - break - } - } - return dst, nil -} - -// nextBlock returns the next block. -// If an error occurs d.err will be set. -// Optionally the function can block for new output. -// If non-blocking mode is used the returned boolean will be false -// if no data was available without blocking. -func (d *Decoder) nextBlock(blocking bool) (ok bool) { - if d.current.err != nil { - // Keep error state. - return false - } - d.current.b = d.current.b[:0] - - // SYNC: - if d.syncStream.enabled { - if !blocking { - return false - } - ok = d.nextBlockSync() - if !ok { - d.stashDecoder() - } - return ok - } - - //ASYNC: - d.stashDecoder() - if blocking { - d.current.decodeOutput, ok = <-d.current.output - } else { - select { - case d.current.decodeOutput, ok = <-d.current.output: - default: - return false - } - } - if !ok { - // This should not happen, so signal error state... - d.current.err = io.ErrUnexpectedEOF - return false - } - next := d.current.decodeOutput - if next.d != nil && next.d.async.newHist != nil { - d.current.crc.Reset() - } - if debugDecoder { - var tmp [4]byte - binary.LittleEndian.PutUint32(tmp[:], uint32(xxhash.Sum64(next.b))) - println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp) - } - - if d.o.ignoreChecksum { - return true - } - - if len(next.b) > 0 { - d.current.crc.Write(next.b) - } - if next.err == nil && next.d != nil && next.d.hasCRC { - got := uint32(d.current.crc.Sum64()) - if got != next.d.checkCRC { - if debugDecoder { - printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC) - } - d.current.err = ErrCRCMismatch - } else { - if debugDecoder { - printf("CRC ok %08x\n", got) - } - } - } - - return true -} - -func (d *Decoder) nextBlockSync() (ok bool) { - if d.current.d == nil { - d.current.d = <-d.decoders - } - for len(d.current.b) == 0 { - if !d.syncStream.inFrame { - d.frame.history.reset() - d.current.err = d.frame.reset(&d.syncStream.br) - if d.current.err == nil { - d.current.err = d.setDict(d.frame) - } - if d.current.err != nil { - return false - } - if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize { - d.current.err = ErrDecoderSizeExceeded - return false - } - - d.syncStream.decodedFrame = 0 - d.syncStream.inFrame = true - } - d.current.err = d.frame.next(d.current.d) - if d.current.err != nil { - return false - } - d.frame.history.ensureBlock() - if debugDecoder { - println("History trimmed:", len(d.frame.history.b), "decoded already:", d.syncStream.decodedFrame) - } - histBefore := len(d.frame.history.b) - d.current.err = d.current.d.decodeBuf(&d.frame.history) - - if d.current.err != nil { - println("error after:", d.current.err) - return false - } - d.current.b = d.frame.history.b[histBefore:] - if debugDecoder { - println("history after:", len(d.frame.history.b)) - } - - // Check frame size (before CRC) - d.syncStream.decodedFrame += uint64(len(d.current.b)) - if d.syncStream.decodedFrame > d.frame.FrameContentSize { - if debugDecoder { - printf("DecodedFrame (%d) > FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize) - } - d.current.err = ErrFrameSizeExceeded - return false - } - - // Check FCS - if d.current.d.Last && d.frame.FrameContentSize != fcsUnknown && d.syncStream.decodedFrame != d.frame.FrameContentSize { - if debugDecoder { - printf("DecodedFrame (%d) != FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize) - } - d.current.err = ErrFrameSizeMismatch - return false - } - - // Update/Check CRC - if d.frame.HasCheckSum { - if !d.o.ignoreChecksum { - d.frame.crc.Write(d.current.b) - } - if d.current.d.Last { - if !d.o.ignoreChecksum { - d.current.err = d.frame.checkCRC() - } else { - d.current.err = d.frame.consumeCRC() - } - if d.current.err != nil { - println("CRC error:", d.current.err) - return false - } - } - } - d.syncStream.inFrame = !d.current.d.Last - } - return true -} - -func (d *Decoder) stashDecoder() { - if d.current.d != nil { - if debugDecoder { - printf("re-adding current decoder %p", d.current.d) - } - d.decoders <- d.current.d - d.current.d = nil - } -} - -// Close will release all resources. -// It is NOT possible to reuse the decoder after this. -func (d *Decoder) Close() { - if d.current.err == ErrDecoderClosed { - return - } - d.drainOutput() - if d.current.cancel != nil { - d.current.cancel() - d.streamWg.Wait() - d.current.cancel = nil - } - if d.decoders != nil { - close(d.decoders) - for dec := range d.decoders { - dec.Close() - } - d.decoders = nil - } - if d.current.d != nil { - d.current.d.Close() - d.current.d = nil - } - d.current.err = ErrDecoderClosed -} - -// IOReadCloser returns the decoder as an io.ReadCloser for convenience. -// Any changes to the decoder will be reflected, so the returned ReadCloser -// can be reused along with the decoder. -// io.WriterTo is also supported by the returned ReadCloser. -func (d *Decoder) IOReadCloser() io.ReadCloser { - return closeWrapper{d: d} -} - -// closeWrapper wraps a function call as a closer. -type closeWrapper struct { - d *Decoder -} - -// WriteTo forwards WriteTo calls to the decoder. -func (c closeWrapper) WriteTo(w io.Writer) (n int64, err error) { - return c.d.WriteTo(w) -} - -// Read forwards read calls to the decoder. -func (c closeWrapper) Read(p []byte) (n int, err error) { - return c.d.Read(p) -} - -// Close closes the decoder. -func (c closeWrapper) Close() error { - c.d.Close() - return nil -} - -type decodeOutput struct { - d *blockDec - b []byte - err error -} - -func (d *Decoder) startSyncDecoder(r io.Reader) error { - d.frame.history.reset() - d.syncStream.br = readerWrapper{r: r} - d.syncStream.inFrame = false - d.syncStream.enabled = true - d.syncStream.decodedFrame = 0 - return nil -} - -// Create Decoder: -// ASYNC: -// Spawn 3 go routines. -// 0: Read frames and decode block literals. -// 1: Decode sequences. -// 2: Execute sequences, send to output. -func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output chan decodeOutput) { - defer d.streamWg.Done() - br := readerWrapper{r: r} - - var seqDecode = make(chan *blockDec, d.o.concurrent) - var seqExecute = make(chan *blockDec, d.o.concurrent) - - // Async 1: Decode sequences... - go func() { - var hist history - var hasErr bool - - for block := range seqDecode { - if hasErr { - if block != nil { - seqExecute <- block - } - continue - } - if block.async.newHist != nil { - if debugDecoder { - println("Async 1: new history, recent:", block.async.newHist.recentOffsets) - } - hist.reset() - hist.decoders = block.async.newHist.decoders - hist.recentOffsets = block.async.newHist.recentOffsets - hist.windowSize = block.async.newHist.windowSize - if block.async.newHist.dict != nil { - hist.setDict(block.async.newHist.dict) - } - } - if block.err != nil || block.Type != blockTypeCompressed { - hasErr = block.err != nil - seqExecute <- block - continue - } - - hist.decoders.literals = block.async.literals - block.err = block.prepareSequences(block.async.seqData, &hist) - if debugDecoder && block.err != nil { - println("prepareSequences returned:", block.err) - } - hasErr = block.err != nil - if block.err == nil { - block.err = block.decodeSequences(&hist) - if debugDecoder && block.err != nil { - println("decodeSequences returned:", block.err) - } - hasErr = block.err != nil - // block.async.sequence = hist.decoders.seq[:hist.decoders.nSeqs] - block.async.seqSize = hist.decoders.seqSize - } - seqExecute <- block - } - close(seqExecute) - hist.reset() - }() - - var wg sync.WaitGroup - wg.Add(1) - - // Async 3: Execute sequences... - frameHistCache := d.frame.history.b - go func() { - var hist history - var decodedFrame uint64 - var fcs uint64 - var hasErr bool - for block := range seqExecute { - out := decodeOutput{err: block.err, d: block} - if block.err != nil || hasErr { - hasErr = true - output <- out - continue - } - if block.async.newHist != nil { - if debugDecoder { - println("Async 2: new history") - } - hist.reset() - hist.windowSize = block.async.newHist.windowSize - hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer - if block.async.newHist.dict != nil { - hist.setDict(block.async.newHist.dict) - } - - if cap(hist.b) < hist.allocFrameBuffer { - if cap(frameHistCache) >= hist.allocFrameBuffer { - hist.b = frameHistCache - } else { - hist.b = make([]byte, 0, hist.allocFrameBuffer) - println("Alloc history sized", hist.allocFrameBuffer) - } - } - hist.b = hist.b[:0] - fcs = block.async.fcs - decodedFrame = 0 - } - do := decodeOutput{err: block.err, d: block} - switch block.Type { - case blockTypeRLE: - if debugDecoder { - println("add rle block length:", block.RLESize) - } - - if cap(block.dst) < int(block.RLESize) { - if block.lowMem { - block.dst = make([]byte, block.RLESize) - } else { - block.dst = make([]byte, maxCompressedBlockSize) - } - } - block.dst = block.dst[:block.RLESize] - v := block.data[0] - for i := range block.dst { - block.dst[i] = v - } - hist.append(block.dst) - do.b = block.dst - case blockTypeRaw: - if debugDecoder { - println("add raw block length:", len(block.data)) - } - hist.append(block.data) - do.b = block.data - case blockTypeCompressed: - if debugDecoder { - println("execute with history length:", len(hist.b), "window:", hist.windowSize) - } - hist.decoders.seqSize = block.async.seqSize - hist.decoders.literals = block.async.literals - do.err = block.executeSequences(&hist) - hasErr = do.err != nil - if debugDecoder && hasErr { - println("executeSequences returned:", do.err) - } - do.b = block.dst - } - if !hasErr { - decodedFrame += uint64(len(do.b)) - if decodedFrame > fcs { - println("fcs exceeded", block.Last, fcs, decodedFrame) - do.err = ErrFrameSizeExceeded - hasErr = true - } else if block.Last && fcs != fcsUnknown && decodedFrame != fcs { - do.err = ErrFrameSizeMismatch - hasErr = true - } else { - if debugDecoder { - println("fcs ok", block.Last, fcs, decodedFrame) - } - } - } - output <- do - } - close(output) - frameHistCache = hist.b - wg.Done() - if debugDecoder { - println("decoder goroutines finished") - } - hist.reset() - }() - - var hist history -decodeStream: - for { - var hasErr bool - hist.reset() - decodeBlock := func(block *blockDec) { - if hasErr { - if block != nil { - seqDecode <- block - } - return - } - if block.err != nil || block.Type != blockTypeCompressed { - hasErr = block.err != nil - seqDecode <- block - return - } - - remain, err := block.decodeLiterals(block.data, &hist) - block.err = err - hasErr = block.err != nil - if err == nil { - block.async.literals = hist.decoders.literals - block.async.seqData = remain - } else if debugDecoder { - println("decodeLiterals error:", err) - } - seqDecode <- block - } - frame := d.frame - if debugDecoder { - println("New frame...") - } - var historySent bool - frame.history.reset() - err := frame.reset(&br) - if debugDecoder && err != nil { - println("Frame decoder returned", err) - } - if err == nil { - err = d.setDict(frame) - } - if err == nil && d.frame.WindowSize > d.o.maxWindowSize { - if debugDecoder { - println("decoder size exceeded, fws:", d.frame.WindowSize, "> mws:", d.o.maxWindowSize) - } - - err = ErrDecoderSizeExceeded - } - if err != nil { - select { - case <-ctx.Done(): - case dec := <-d.decoders: - dec.sendErr(err) - decodeBlock(dec) - } - break decodeStream - } - - // Go through all blocks of the frame. - for { - var dec *blockDec - select { - case <-ctx.Done(): - break decodeStream - case dec = <-d.decoders: - // Once we have a decoder, we MUST return it. - } - err := frame.next(dec) - if !historySent { - h := frame.history - if debugDecoder { - println("Alloc History:", h.allocFrameBuffer) - } - hist.reset() - if h.dict != nil { - hist.setDict(h.dict) - } - dec.async.newHist = &h - dec.async.fcs = frame.FrameContentSize - historySent = true - } else { - dec.async.newHist = nil - } - if debugDecoder && err != nil { - println("next block returned error:", err) - } - dec.err = err - dec.hasCRC = false - if dec.Last && frame.HasCheckSum && err == nil { - crc, err := frame.rawInput.readSmall(4) - if len(crc) < 4 { - if err == nil { - err = io.ErrUnexpectedEOF - - } - println("CRC missing?", err) - dec.err = err - } else { - dec.checkCRC = binary.LittleEndian.Uint32(crc) - dec.hasCRC = true - if debugDecoder { - printf("found crc to check: %08x\n", dec.checkCRC) - } - } - } - err = dec.err - last := dec.Last - decodeBlock(dec) - if err != nil { - break decodeStream - } - if last { - break - } - } - } - close(seqDecode) - wg.Wait() - hist.reset() - d.frame.history.b = frameHistCache -} - -func (d *Decoder) setDict(frame *frameDec) (err error) { - dict, ok := d.dicts[frame.DictionaryID] - if ok { - if debugDecoder { - println("setting dict", frame.DictionaryID) - } - frame.history.setDict(dict) - } else if frame.DictionaryID != 0 { - // A zero or missing dictionary id is ambiguous: - // either dictionary zero, or no dictionary. In particular, - // zstd --patch-from uses this id for the source file, - // so only return an error if the dictionary id is not zero. - err = ErrUnknownDictionary - } - return err -} diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go deleted file mode 100644 index 774c5f00f..000000000 --- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "errors" - "fmt" - "math/bits" - "runtime" -) - -// DOption is an option for creating a decoder. -type DOption func(*decoderOptions) error - -// options retains accumulated state of multiple options. -type decoderOptions struct { - lowMem bool - concurrent int - maxDecodedSize uint64 - maxWindowSize uint64 - dicts []*dict - ignoreChecksum bool - limitToCap bool - decodeBufsBelow int -} - -func (o *decoderOptions) setDefault() { - *o = decoderOptions{ - // use less ram: true for now, but may change. - lowMem: true, - concurrent: runtime.GOMAXPROCS(0), - maxWindowSize: MaxWindowSize, - decodeBufsBelow: 128 << 10, - } - if o.concurrent > 4 { - o.concurrent = 4 - } - o.maxDecodedSize = 64 << 30 -} - -// WithDecoderLowmem will set whether to use a lower amount of memory, -// but possibly have to allocate more while running. -func WithDecoderLowmem(b bool) DOption { - return func(o *decoderOptions) error { o.lowMem = b; return nil } -} - -// WithDecoderConcurrency sets the number of created decoders. -// When decoding block with DecodeAll, this will limit the number -// of possible concurrently running decodes. -// When decoding streams, this will limit the number of -// inflight blocks. -// When decoding streams and setting maximum to 1, -// no async decoding will be done. -// When a value of 0 is provided GOMAXPROCS will be used. -// By default this will be set to 4 or GOMAXPROCS, whatever is lower. -func WithDecoderConcurrency(n int) DOption { - return func(o *decoderOptions) error { - if n < 0 { - return errors.New("concurrency must be at least 1") - } - if n == 0 { - o.concurrent = runtime.GOMAXPROCS(0) - } else { - o.concurrent = n - } - return nil - } -} - -// WithDecoderMaxMemory allows to set a maximum decoded size for in-memory -// non-streaming operations or maximum window size for streaming operations. -// This can be used to control memory usage of potentially hostile content. -// Maximum is 1 << 63 bytes. Default is 64GiB. -func WithDecoderMaxMemory(n uint64) DOption { - return func(o *decoderOptions) error { - if n == 0 { - return errors.New("WithDecoderMaxMemory must be at least 1") - } - if n > 1<<63 { - return errors.New("WithDecoderMaxmemory must be less than 1 << 63") - } - o.maxDecodedSize = n - return nil - } -} - -// WithDecoderDicts allows to register one or more dictionaries for the decoder. -// -// Each slice in dict must be in the [dictionary format] produced by -// "zstd --train" from the Zstandard reference implementation. -// -// If several dictionaries with the same ID are provided, the last one will be used. -// -// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format -func WithDecoderDicts(dicts ...[]byte) DOption { - return func(o *decoderOptions) error { - for _, b := range dicts { - d, err := loadDict(b) - if err != nil { - return err - } - o.dicts = append(o.dicts, d) - } - return nil - } -} - -// WithDecoderDictRaw registers a dictionary that may be used by the decoder. -// The slice content can be arbitrary data. -func WithDecoderDictRaw(id uint32, content []byte) DOption { - return func(o *decoderOptions) error { - if bits.UintSize > 32 && uint(len(content)) > dictMaxLength { - return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content)) - } - o.dicts = append(o.dicts, &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}) - return nil - } -} - -// WithDecoderMaxWindow allows to set a maximum window size for decodes. -// This allows rejecting packets that will cause big memory usage. -// The Decoder will likely allocate more memory based on the WithDecoderLowmem setting. -// If WithDecoderMaxMemory is set to a lower value, that will be used. -// Default is 512MB, Maximum is ~3.75 TB as per zstandard spec. -func WithDecoderMaxWindow(size uint64) DOption { - return func(o *decoderOptions) error { - if size < MinWindowSize { - return errors.New("WithMaxWindowSize must be at least 1KB, 1024 bytes") - } - if size > (1<<41)+7*(1<<38) { - return errors.New("WithMaxWindowSize must be less than (1<<41) + 7*(1<<38) ~ 3.75TB") - } - o.maxWindowSize = size - return nil - } -} - -// WithDecodeAllCapLimit will limit DecodeAll to decoding cap(dst)-len(dst) bytes, -// or any size set in WithDecoderMaxMemory. -// This can be used to limit decoding to a specific maximum output size. -// Disabled by default. -func WithDecodeAllCapLimit(b bool) DOption { - return func(o *decoderOptions) error { - o.limitToCap = b - return nil - } -} - -// WithDecodeBuffersBelow will fully decode readers that have a -// `Bytes() []byte` and `Len() int` interface similar to bytes.Buffer. -// This typically uses less allocations but will have the full decompressed object in memory. -// Note that DecodeAllCapLimit will disable this, as well as giving a size of 0 or less. -// Default is 128KiB. -func WithDecodeBuffersBelow(size int) DOption { - return func(o *decoderOptions) error { - o.decodeBufsBelow = size - return nil - } -} - -// IgnoreChecksum allows to forcibly ignore checksum checking. -func IgnoreChecksum(b bool) DOption { - return func(o *decoderOptions) error { - o.ignoreChecksum = b - return nil - } -} diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go deleted file mode 100644 index b7b83164b..000000000 --- a/vendor/github.com/klauspost/compress/zstd/dict.go +++ /dev/null @@ -1,565 +0,0 @@ -package zstd - -import ( - "bytes" - "encoding/binary" - "errors" - "fmt" - "io" - "math" - "sort" - - "github.com/klauspost/compress/huff0" -) - -type dict struct { - id uint32 - - litEnc *huff0.Scratch - llDec, ofDec, mlDec sequenceDec - offsets [3]int - content []byte -} - -const dictMagic = "\x37\xa4\x30\xec" - -// Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB. -const dictMaxLength = 1 << 31 - -// ID returns the dictionary id or 0 if d is nil. -func (d *dict) ID() uint32 { - if d == nil { - return 0 - } - return d.id -} - -// ContentSize returns the dictionary content size or 0 if d is nil. -func (d *dict) ContentSize() int { - if d == nil { - return 0 - } - return len(d.content) -} - -// Content returns the dictionary content. -func (d *dict) Content() []byte { - if d == nil { - return nil - } - return d.content -} - -// Offsets returns the initial offsets. -func (d *dict) Offsets() [3]int { - if d == nil { - return [3]int{} - } - return d.offsets -} - -// LitEncoder returns the literal encoder. -func (d *dict) LitEncoder() *huff0.Scratch { - if d == nil { - return nil - } - return d.litEnc -} - -// Load a dictionary as described in -// https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format -func loadDict(b []byte) (*dict, error) { - // Check static field size. - if len(b) <= 8+(3*4) { - return nil, io.ErrUnexpectedEOF - } - d := dict{ - llDec: sequenceDec{fse: &fseDecoder{}}, - ofDec: sequenceDec{fse: &fseDecoder{}}, - mlDec: sequenceDec{fse: &fseDecoder{}}, - } - if string(b[:4]) != dictMagic { - return nil, ErrMagicMismatch - } - d.id = binary.LittleEndian.Uint32(b[4:8]) - if d.id == 0 { - return nil, errors.New("dictionaries cannot have ID 0") - } - - // Read literal table - var err error - d.litEnc, b, err = huff0.ReadTable(b[8:], nil) - if err != nil { - return nil, fmt.Errorf("loading literal table: %w", err) - } - d.litEnc.Reuse = huff0.ReusePolicyMust - - br := byteReader{ - b: b, - off: 0, - } - readDec := func(i tableIndex, dec *fseDecoder) error { - if err := dec.readNCount(&br, uint16(maxTableSymbol[i])); err != nil { - return err - } - if br.overread() { - return io.ErrUnexpectedEOF - } - err = dec.transform(symbolTableX[i]) - if err != nil { - println("Transform table error:", err) - return err - } - if debugDecoder || debugEncoder { - println("Read table ok", "symbolLen:", dec.symbolLen) - } - // Set decoders as predefined so they aren't reused. - dec.preDefined = true - return nil - } - - if err := readDec(tableOffsets, d.ofDec.fse); err != nil { - return nil, err - } - if err := readDec(tableMatchLengths, d.mlDec.fse); err != nil { - return nil, err - } - if err := readDec(tableLiteralLengths, d.llDec.fse); err != nil { - return nil, err - } - if br.remain() < 12 { - return nil, io.ErrUnexpectedEOF - } - - d.offsets[0] = int(br.Uint32()) - br.advance(4) - d.offsets[1] = int(br.Uint32()) - br.advance(4) - d.offsets[2] = int(br.Uint32()) - br.advance(4) - if d.offsets[0] <= 0 || d.offsets[1] <= 0 || d.offsets[2] <= 0 { - return nil, errors.New("invalid offset in dictionary") - } - d.content = make([]byte, br.remain()) - copy(d.content, br.unread()) - if d.offsets[0] > len(d.content) || d.offsets[1] > len(d.content) || d.offsets[2] > len(d.content) { - return nil, fmt.Errorf("initial offset bigger than dictionary content size %d, offsets: %v", len(d.content), d.offsets) - } - - return &d, nil -} - -// InspectDictionary loads a zstd dictionary and provides functions to inspect the content. -func InspectDictionary(b []byte) (interface { - ID() uint32 - ContentSize() int - Content() []byte - Offsets() [3]int - LitEncoder() *huff0.Scratch -}, error) { - initPredefined() - d, err := loadDict(b) - return d, err -} - -type BuildDictOptions struct { - // Dictionary ID. - ID uint32 - - // Content to use to create dictionary tables. - Contents [][]byte - - // History to use for all blocks. - History []byte - - // Offsets to use. - Offsets [3]int - - // CompatV155 will make the dictionary compatible with Zstd v1.5.5 and earlier. - // See https://github.com/facebook/zstd/issues/3724 - CompatV155 bool - - // Use the specified encoder level. - // The dictionary will be built using the specified encoder level, - // which will reflect speed and make the dictionary tailored for that level. - // If not set SpeedBestCompression will be used. - Level EncoderLevel - - // DebugOut will write stats and other details here if set. - DebugOut io.Writer -} - -func BuildDict(o BuildDictOptions) ([]byte, error) { - initPredefined() - hist := o.History - contents := o.Contents - debug := o.DebugOut != nil - println := func(args ...interface{}) { - if o.DebugOut != nil { - fmt.Fprintln(o.DebugOut, args...) - } - } - printf := func(s string, args ...interface{}) { - if o.DebugOut != nil { - fmt.Fprintf(o.DebugOut, s, args...) - } - } - print := func(args ...interface{}) { - if o.DebugOut != nil { - fmt.Fprint(o.DebugOut, args...) - } - } - - if int64(len(hist)) > dictMaxLength { - return nil, fmt.Errorf("dictionary of size %d > %d", len(hist), int64(dictMaxLength)) - } - if len(hist) < 8 { - return nil, fmt.Errorf("dictionary of size %d < %d", len(hist), 8) - } - if len(contents) == 0 { - return nil, errors.New("no content provided") - } - d := dict{ - id: o.ID, - litEnc: nil, - llDec: sequenceDec{}, - ofDec: sequenceDec{}, - mlDec: sequenceDec{}, - offsets: o.Offsets, - content: hist, - } - block := blockEnc{lowMem: false} - block.init() - enc := encoder(&bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(maxMatchLen), bufferReset: math.MaxInt32 - int32(maxMatchLen*2), lowMem: false}}) - if o.Level != 0 { - eOpts := encoderOptions{ - level: o.Level, - blockSize: maxMatchLen, - windowSize: maxMatchLen, - dict: &d, - lowMem: false, - } - enc = eOpts.encoder() - } else { - o.Level = SpeedBestCompression - } - var ( - remain [256]int - ll [256]int - ml [256]int - of [256]int - ) - addValues := func(dst *[256]int, src []byte) { - for _, v := range src { - dst[v]++ - } - } - addHist := func(dst *[256]int, src *[256]uint32) { - for i, v := range src { - dst[i] += int(v) - } - } - seqs := 0 - nUsed := 0 - litTotal := 0 - newOffsets := make(map[uint32]int, 1000) - for _, b := range contents { - block.reset(nil) - if len(b) < 8 { - continue - } - nUsed++ - enc.Reset(&d, true) - enc.Encode(&block, b) - addValues(&remain, block.literals) - litTotal += len(block.literals) - if len(block.sequences) == 0 { - continue - } - seqs += len(block.sequences) - block.genCodes() - addHist(&ll, block.coders.llEnc.Histogram()) - addHist(&ml, block.coders.mlEnc.Histogram()) - addHist(&of, block.coders.ofEnc.Histogram()) - for i, seq := range block.sequences { - if i > 3 { - break - } - offset := seq.offset - if offset == 0 { - continue - } - if int(offset) >= len(o.History) { - continue - } - if offset > 3 { - newOffsets[offset-3]++ - } else { - newOffsets[uint32(o.Offsets[offset-1])]++ - } - } - } - // Find most used offsets. - var sortedOffsets []uint32 - for k := range newOffsets { - sortedOffsets = append(sortedOffsets, k) - } - sort.Slice(sortedOffsets, func(i, j int) bool { - a, b := sortedOffsets[i], sortedOffsets[j] - if a == b { - // Prefer the longer offset - return sortedOffsets[i] > sortedOffsets[j] - } - return newOffsets[sortedOffsets[i]] > newOffsets[sortedOffsets[j]] - }) - if len(sortedOffsets) > 3 { - if debug { - print("Offsets:") - for i, v := range sortedOffsets { - if i > 20 { - break - } - printf("[%d: %d],", v, newOffsets[v]) - } - println("") - } - - sortedOffsets = sortedOffsets[:3] - } - for i, v := range sortedOffsets { - o.Offsets[i] = int(v) - } - if debug { - println("New repeat offsets", o.Offsets) - } - - if nUsed == 0 || seqs == 0 { - return nil, fmt.Errorf("%d blocks, %d sequences found", nUsed, seqs) - } - if debug { - println("Sequences:", seqs, "Blocks:", nUsed, "Literals:", litTotal) - } - if seqs/nUsed < 512 { - // Use 512 as minimum. - nUsed = seqs / 512 - if nUsed == 0 { - nUsed = 1 - } - } - copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) { - hist := dst.Histogram() - var maxSym uint8 - var maxCount int - var fakeLength int - for i, v := range src { - if v > 0 { - v = v / nUsed - if v == 0 { - v = 1 - } - } - if v > maxCount { - maxCount = v - } - if v != 0 { - maxSym = uint8(i) - } - fakeLength += v - hist[i] = uint32(v) - } - - // Ensure we aren't trying to represent RLE. - if maxCount == fakeLength { - for i := range hist { - if uint8(i) == maxSym { - fakeLength++ - maxSym++ - hist[i+1] = 1 - if maxSym > 1 { - break - } - } - if hist[0] == 0 { - fakeLength++ - hist[i] = 1 - if maxSym > 1 { - break - } - } - } - } - - dst.HistogramFinished(maxSym, maxCount) - dst.reUsed = false - dst.useRLE = false - err := dst.normalizeCount(fakeLength) - if err != nil { - return nil, err - } - if debug { - println("RAW:", dst.count[:maxSym+1], "NORM:", dst.norm[:maxSym+1], "LEN:", fakeLength) - } - return dst.writeCount(nil) - } - if debug { - print("Literal lengths: ") - } - llTable, err := copyHist(block.coders.llEnc, &ll) - if err != nil { - return nil, err - } - if debug { - print("Match lengths: ") - } - mlTable, err := copyHist(block.coders.mlEnc, &ml) - if err != nil { - return nil, err - } - if debug { - print("Offsets: ") - } - ofTable, err := copyHist(block.coders.ofEnc, &of) - if err != nil { - return nil, err - } - - // Literal table - avgSize := litTotal - if avgSize > huff0.BlockSizeMax/2 { - avgSize = huff0.BlockSizeMax / 2 - } - huffBuff := make([]byte, 0, avgSize) - // Target size - div := litTotal / avgSize - if div < 1 { - div = 1 - } - if debug { - println("Huffman weights:") - } - for i, n := range remain[:] { - if n > 0 { - n = n / div - // Allow all entries to be represented. - if n == 0 { - n = 1 - } - huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...) - if debug { - printf("[%d: %d], ", i, n) - } - } - } - if o.CompatV155 && remain[255]/div == 0 { - huffBuff = append(huffBuff, 255) - } - scratch := &huff0.Scratch{TableLog: 11} - for tries := 0; tries < 255; tries++ { - scratch = &huff0.Scratch{TableLog: 11} - _, _, err = huff0.Compress1X(huffBuff, scratch) - if err == nil { - break - } - if debug { - printf("Try %d: Huffman error: %v\n", tries+1, err) - } - huffBuff = huffBuff[:0] - if tries == 250 { - if debug { - println("Huffman: Bailing out with predefined table") - } - - // Bail out.... Just generate something - huffBuff = append(huffBuff, bytes.Repeat([]byte{255}, 10000)...) - for i := 0; i < 128; i++ { - huffBuff = append(huffBuff, byte(i)) - } - continue - } - if errors.Is(err, huff0.ErrIncompressible) { - // Try truncating least common. - for i, n := range remain[:] { - if n > 0 { - n = n / (div * (i + 1)) - if n > 0 { - huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...) - } - } - } - if o.CompatV155 && len(huffBuff) > 0 && huffBuff[len(huffBuff)-1] != 255 { - huffBuff = append(huffBuff, 255) - } - if len(huffBuff) == 0 { - huffBuff = append(huffBuff, 0, 255) - } - } - if errors.Is(err, huff0.ErrUseRLE) { - for i, n := range remain[:] { - n = n / (div * (i + 1)) - // Allow all entries to be represented. - if n == 0 { - n = 1 - } - huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...) - } - } - } - - var out bytes.Buffer - out.Write([]byte(dictMagic)) - out.Write(binary.LittleEndian.AppendUint32(nil, o.ID)) - out.Write(scratch.OutTable) - if debug { - println("huff table:", len(scratch.OutTable), "bytes") - println("of table:", len(ofTable), "bytes") - println("ml table:", len(mlTable), "bytes") - println("ll table:", len(llTable), "bytes") - } - out.Write(ofTable) - out.Write(mlTable) - out.Write(llTable) - out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[0]))) - out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[1]))) - out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[2]))) - out.Write(hist) - if debug { - _, err := loadDict(out.Bytes()) - if err != nil { - panic(err) - } - i, err := InspectDictionary(out.Bytes()) - if err != nil { - panic(err) - } - println("ID:", i.ID()) - println("Content size:", i.ContentSize()) - println("Encoder:", i.LitEncoder() != nil) - println("Offsets:", i.Offsets()) - var totalSize int - for _, b := range contents { - totalSize += len(b) - } - - encWith := func(opts ...EOption) int { - enc, err := NewWriter(nil, opts...) - if err != nil { - panic(err) - } - defer enc.Close() - var dst []byte - var totalSize int - for _, b := range contents { - dst = enc.EncodeAll(b, dst[:0]) - totalSize += len(dst) - } - return totalSize - } - plain := encWith(WithEncoderLevel(o.Level)) - withDict := encWith(WithEncoderLevel(o.Level), WithEncoderDict(out.Bytes())) - println("Input size:", totalSize) - println("Plain Compressed:", plain) - println("Dict Compressed:", withDict) - println("Saved:", plain-withDict, (plain-withDict)/len(contents), "bytes per input (rounded down)") - } - return out.Bytes(), nil -} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go deleted file mode 100644 index 7d250c67f..000000000 --- a/vendor/github.com/klauspost/compress/zstd/enc_base.go +++ /dev/null @@ -1,173 +0,0 @@ -package zstd - -import ( - "fmt" - "math/bits" - - "github.com/klauspost/compress/zstd/internal/xxhash" -) - -const ( - dictShardBits = 6 -) - -type fastBase struct { - // cur is the offset at the start of hist - cur int32 - // maximum offset. Should be at least 2x block size. - maxMatchOff int32 - bufferReset int32 - hist []byte - crc *xxhash.Digest - tmp [8]byte - blk *blockEnc - lastDictID uint32 - lowMem bool -} - -// CRC returns the underlying CRC writer. -func (e *fastBase) CRC() *xxhash.Digest { - return e.crc -} - -// AppendCRC will append the CRC to the destination slice and return it. -func (e *fastBase) AppendCRC(dst []byte) []byte { - crc := e.crc.Sum(e.tmp[:0]) - dst = append(dst, crc[7], crc[6], crc[5], crc[4]) - return dst -} - -// WindowSize returns the window size of the encoder, -// or a window size small enough to contain the input size, if > 0. -func (e *fastBase) WindowSize(size int64) int32 { - if size > 0 && size < int64(e.maxMatchOff) { - b := int32(1) << uint(bits.Len(uint(size))) - // Keep minimum window. - if b < 1024 { - b = 1024 - } - return b - } - return e.maxMatchOff -} - -// Block returns the current block. -func (e *fastBase) Block() *blockEnc { - return e.blk -} - -func (e *fastBase) addBlock(src []byte) int32 { - if debugAsserts && e.cur > e.bufferReset { - panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset)) - } - // check if we have space already - if len(e.hist)+len(src) > cap(e.hist) { - if cap(e.hist) == 0 { - e.ensureHist(len(src)) - } else { - if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) { - panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff)) - } - // Move down - offset := int32(len(e.hist)) - e.maxMatchOff - copy(e.hist[0:e.maxMatchOff], e.hist[offset:]) - e.cur += offset - e.hist = e.hist[:e.maxMatchOff] - } - } - s := int32(len(e.hist)) - e.hist = append(e.hist, src...) - return s -} - -// ensureHist will ensure that history can keep at least this many bytes. -func (e *fastBase) ensureHist(n int) { - if cap(e.hist) >= n { - return - } - l := e.maxMatchOff - if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize { - l += maxCompressedBlockSize - } else { - l += e.maxMatchOff - } - // Make it at least 1MB. - if l < 1<<20 && !e.lowMem { - l = 1 << 20 - } - // Make it at least the requested size. - if l < int32(n) { - l = int32(n) - } - e.hist = make([]byte, 0, l) -} - -// useBlock will replace the block with the provided one, -// but transfer recent offsets from the previous. -func (e *fastBase) UseBlock(enc *blockEnc) { - enc.reset(e.blk) - e.blk = enc -} - -func (e *fastBase) matchlen(s, t int32, src []byte) int32 { - if debugAsserts { - if s < 0 { - err := fmt.Sprintf("s (%d) < 0", s) - panic(err) - } - if t < 0 { - err := fmt.Sprintf("t (%d) < 0", t) - panic(err) - } - if s-t > e.maxMatchOff { - err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff) - panic(err) - } - if len(src)-int(s) > maxCompressedBlockSize { - panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) - } - } - return int32(matchLen(src[s:], src[t:])) -} - -// Reset the encoding table. -func (e *fastBase) resetBase(d *dict, singleBlock bool) { - if e.blk == nil { - e.blk = &blockEnc{lowMem: e.lowMem} - e.blk.init() - } else { - e.blk.reset(nil) - } - e.blk.initNewEncode() - if e.crc == nil { - e.crc = xxhash.New() - } else { - e.crc.Reset() - } - e.blk.dictLitEnc = nil - if d != nil { - low := e.lowMem - if singleBlock { - e.lowMem = true - } - e.ensureHist(d.ContentSize() + maxCompressedBlockSize) - e.lowMem = low - } - - // We offset current position so everything will be out of reach. - // If above reset line, history will be purged. - if e.cur < e.bufferReset { - e.cur += e.maxMatchOff + int32(len(e.hist)) - } - e.hist = e.hist[:0] - if d != nil { - // Set offsets (currently not used) - for i, off := range d.offsets { - e.blk.recentOffsets[i] = uint32(off) - e.blk.prevRecentOffsets[i] = e.blk.recentOffsets[i] - } - // Transfer litenc. - e.blk.dictLitEnc = d.litEnc - e.hist = append(e.hist, d.content...) - } -} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go deleted file mode 100644 index 4613724e9..000000000 --- a/vendor/github.com/klauspost/compress/zstd/enc_best.go +++ /dev/null @@ -1,560 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "bytes" - "fmt" - - "github.com/klauspost/compress" -) - -const ( - bestLongTableBits = 22 // Bits used in the long match table - bestLongTableSize = 1 << bestLongTableBits // Size of the table - bestLongLen = 8 // Bytes used for table hash - - // Note: Increasing the short table bits or making the hash shorter - // can actually lead to compression degradation since it will 'steal' more from the - // long match table and match offsets are quite big. - // This greatly depends on the type of input. - bestShortTableBits = 18 // Bits used in the short match table - bestShortTableSize = 1 << bestShortTableBits // Size of the table - bestShortLen = 4 // Bytes used for table hash - -) - -type match struct { - offset int32 - s int32 - length int32 - rep int32 - est int32 -} - -const highScore = maxMatchLen * 8 - -// estBits will estimate output bits from predefined tables. -func (m *match) estBits(bitsPerByte int32) { - mlc := mlCode(uint32(m.length - zstdMinMatch)) - var ofc uint8 - if m.rep < 0 { - ofc = ofCode(uint32(m.s-m.offset) + 3) - } else { - ofc = ofCode(uint32(m.rep) & 3) - } - // Cost, excluding - ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc] - - // Add cost of match encoding... - m.est = int32(ofTT.outBits + mlTT.outBits) - m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16) - // Subtract savings compared to literal encoding... - m.est -= (m.length * bitsPerByte) >> 10 - if m.est > 0 { - // Unlikely gain.. - m.length = 0 - m.est = highScore - } -} - -// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. -// The long match table contains the previous entry with the same hash, -// effectively making it a "chain" of length 2. -// When we find a long match we choose between the two values and select the longest. -// When we find a short match, after checking the long, we check if we can find a long at n+1 -// and that it is longer (lazy matching). -type bestFastEncoder struct { - fastBase - table [bestShortTableSize]prevEntry - longTable [bestLongTableSize]prevEntry - dictTable []prevEntry - dictLongTable []prevEntry -} - -// Encode improves compression... -func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { - const ( - // Input margin is the number of bytes we read (8) - // and the maximum we will read ahead (2) - inputMargin = 8 + 4 - minNonLiteralBlockSize = 16 - ) - - // Protect against e.cur wraparound. - for e.cur >= e.bufferReset-int32(len(e.hist)) { - if len(e.hist) == 0 { - e.table = [bestShortTableSize]prevEntry{} - e.longTable = [bestLongTableSize]prevEntry{} - e.cur = e.maxMatchOff - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff - for i := range e.table[:] { - v := e.table[i].offset - v2 := e.table[i].prev - if v < minOff { - v = 0 - v2 = 0 - } else { - v = v - e.cur + e.maxMatchOff - if v2 < minOff { - v2 = 0 - } else { - v2 = v2 - e.cur + e.maxMatchOff - } - } - e.table[i] = prevEntry{ - offset: v, - prev: v2, - } - } - for i := range e.longTable[:] { - v := e.longTable[i].offset - v2 := e.longTable[i].prev - if v < minOff { - v = 0 - v2 = 0 - } else { - v = v - e.cur + e.maxMatchOff - if v2 < minOff { - v2 = 0 - } else { - v2 = v2 - e.cur + e.maxMatchOff - } - } - e.longTable[i] = prevEntry{ - offset: v, - prev: v2, - } - } - e.cur = e.maxMatchOff - break - } - - // Add block to history - s := e.addBlock(src) - blk.size = len(src) - - // Check RLE first - if len(src) > zstdMinMatch { - ml := matchLen(src[1:], src) - if ml == len(src)-1 { - blk.literals = append(blk.literals, src[0]) - blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3}) - return - } - } - - if len(src) < minNonLiteralBlockSize { - blk.extraLits = len(src) - blk.literals = blk.literals[:len(src)] - copy(blk.literals, src) - return - } - - // Use this to estimate literal cost. - // Scaled by 10 bits. - bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src)) - // Huffman can never go < 1 bit/byte - if bitsPerByte < 1024 { - bitsPerByte = 1024 - } - - // Override src - src = e.hist - sLimit := int32(len(src)) - inputMargin - const kSearchStrength = 10 - - // nextEmit is where in src the next emitLiteral should start from. - nextEmit := s - - // Relative offsets - offset1 := int32(blk.recentOffsets[0]) - offset2 := int32(blk.recentOffsets[1]) - offset3 := int32(blk.recentOffsets[2]) - - addLiterals := func(s *seq, until int32) { - if until == nextEmit { - return - } - blk.literals = append(blk.literals, src[nextEmit:until]...) - s.litLen = uint32(until - nextEmit) - } - - if debugEncoder { - println("recent offsets:", blk.recentOffsets) - } - -encodeLoop: - for { - // We allow the encoder to optionally turn off repeat offsets across blocks - canRepeat := len(blk.sequences) > 2 - - if debugAsserts && canRepeat && offset1 == 0 { - panic("offset0 was 0") - } - - const goodEnough = 250 - - cv := load6432(src, s) - - nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) - nextHashS := hashLen(cv, bestShortTableBits, bestShortLen) - candidateL := e.longTable[nextHashL] - candidateS := e.table[nextHashS] - - // Set m to a match at offset if it looks like that will improve compression. - improve := func(m *match, offset int32, s int32, first uint32, rep int32) { - delta := s - offset - if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first { - return - } - // Try to quick reject if we already have a long match. - if m.length > 16 { - left := len(src) - int(m.s+m.length) - // If we are too close to the end, keep as is. - if left <= 0 { - return - } - checkLen := m.length - (s - m.s) - 8 - if left > 2 && checkLen > 4 { - // Check 4 bytes, 4 bytes from the end of the current match. - a := load3232(src, offset+checkLen) - b := load3232(src, s+checkLen) - if a != b { - return - } - } - } - l := 4 + e.matchlen(s+4, offset+4, src) - if m.rep <= 0 { - // Extend candidate match backwards as far as possible. - // Do not extend repeats as we can assume they are optimal - // and offsets change if s == nextEmit. - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength { - s-- - offset-- - l++ - } - } - if debugAsserts { - if offset >= s { - panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff)) - } - if !bytes.Equal(src[s:s+l], src[offset:offset+l]) { - panic(fmt.Sprintf("second match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) - } - } - cand := match{offset: offset, s: s, length: l, rep: rep} - cand.estBits(bitsPerByte) - if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 { - *m = cand - } - } - - best := match{s: s, est: highScore} - improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1) - improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1) - improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1) - improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1) - - if canRepeat && best.length < goodEnough { - if s == nextEmit { - // Check repeats straight after a match. - improve(&best, s-offset2, s, uint32(cv), 1|4) - improve(&best, s-offset3, s, uint32(cv), 2|4) - if offset1 > 1 { - improve(&best, s-(offset1-1), s, uint32(cv), 3|4) - } - } - - // If either no match or a non-repeat match, check at + 1 - if best.rep <= 0 { - cv32 := uint32(cv >> 8) - spp := s + 1 - improve(&best, spp-offset1, spp, cv32, 1) - improve(&best, spp-offset2, spp, cv32, 2) - improve(&best, spp-offset3, spp, cv32, 3) - if best.rep < 0 { - cv32 = uint32(cv >> 24) - spp += 2 - improve(&best, spp-offset1, spp, cv32, 1) - improve(&best, spp-offset2, spp, cv32, 2) - improve(&best, spp-offset3, spp, cv32, 3) - } - } - } - // Load next and check... - e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset} - e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset} - index0 := s + 1 - - // Look far ahead, unless we have a really long match already... - if best.length < goodEnough { - // No match found, move forward on input, no need to check forward... - if best.length < 4 { - s += 1 + (s-nextEmit)>>(kSearchStrength-1) - if s >= sLimit { - break encodeLoop - } - continue - } - - candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)] - cv = load6432(src, s+1) - cv2 := load6432(src, s+2) - candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)] - candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)] - - // Short at s+1 - improve(&best, candidateS.offset-e.cur, s+1, uint32(cv), -1) - // Long at s+1, s+2 - improve(&best, candidateL.offset-e.cur, s+1, uint32(cv), -1) - improve(&best, candidateL.prev-e.cur, s+1, uint32(cv), -1) - improve(&best, candidateL2.offset-e.cur, s+2, uint32(cv2), -1) - improve(&best, candidateL2.prev-e.cur, s+2, uint32(cv2), -1) - if false { - // Short at s+3. - // Too often worse... - improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+3, uint32(cv2>>8), -1) - } - - // Start check at a fixed offset to allow for a few mismatches. - // For this compression level 2 yields the best results. - // We cannot do this if we have already indexed this position. - const skipBeginning = 2 - if best.s > s-skipBeginning { - // See if we can find a better match by checking where the current best ends. - // Use that offset to see if we can find a better full match. - if sAt := best.s + best.length; sAt < sLimit { - nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen) - candidateEnd := e.longTable[nextHashL] - - if off := candidateEnd.offset - e.cur - best.length + skipBeginning; off >= 0 { - improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) - if off := candidateEnd.prev - e.cur - best.length + skipBeginning; off >= 0 { - improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) - } - } - } - } - } - - if debugAsserts { - if best.offset >= best.s { - panic(fmt.Sprintf("best.offset > s: %d >= %d", best.offset, best.s)) - } - if best.s < nextEmit { - panic(fmt.Sprintf("s %d < nextEmit %d", best.s, nextEmit)) - } - if best.offset < s-e.maxMatchOff { - panic(fmt.Sprintf("best.offset < s-e.maxMatchOff: %d < %d", best.offset, s-e.maxMatchOff)) - } - if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) { - panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length])) - } - } - - // We have a match, we can store the forward value - s = best.s - if best.rep > 0 { - var seq seq - seq.matchLen = uint32(best.length - zstdMinMatch) - addLiterals(&seq, best.s) - - // Repeat. If bit 4 is set, this is a non-lit repeat. - seq.offset = uint32(best.rep & 3) - if debugSequences { - println("repeat sequence", seq, "next s:", best.s, "off:", best.s-best.offset) - } - blk.sequences = append(blk.sequences, seq) - - // Index old s + 1 -> s - 1 - s = best.s + best.length - nextEmit = s - - // Index skipped... - end := s - if s > sLimit+4 { - end = sLimit + 4 - } - off := index0 + e.cur - for index0 < end { - cv0 := load6432(src, index0) - h0 := hashLen(cv0, bestLongTableBits, bestLongLen) - h1 := hashLen(cv0, bestShortTableBits, bestShortLen) - e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} - off++ - index0++ - } - - switch best.rep { - case 2, 4 | 1: - offset1, offset2 = offset2, offset1 - case 3, 4 | 2: - offset1, offset2, offset3 = offset3, offset1, offset2 - case 4 | 3: - offset1, offset2, offset3 = offset1-1, offset1, offset2 - } - if s >= sLimit { - if debugEncoder { - println("repeat ended", s, best.length) - } - break encodeLoop - } - continue - } - - // A 4-byte match has been found. Update recent offsets. - // We'll later see if more than 4 bytes. - t := best.offset - offset1, offset2, offset3 = s-t, offset1, offset2 - - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - - if debugAsserts && int(offset1) > len(src) { - panic("invalid offset") - } - - // Write our sequence - var seq seq - l := best.length - seq.litLen = uint32(s - nextEmit) - seq.matchLen = uint32(l - zstdMinMatch) - if seq.litLen > 0 { - blk.literals = append(blk.literals, src[nextEmit:s]...) - } - seq.offset = uint32(s-t) + 3 - s += l - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - nextEmit = s - - // Index old s + 1 -> s - 1 or sLimit - end := s - if s > sLimit-4 { - end = sLimit - 4 - } - - off := index0 + e.cur - for index0 < end { - cv0 := load6432(src, index0) - h0 := hashLen(cv0, bestLongTableBits, bestLongLen) - h1 := hashLen(cv0, bestShortTableBits, bestShortLen) - e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} - index0++ - off++ - } - if s >= sLimit { - break encodeLoop - } - } - - if int(nextEmit) < len(src) { - blk.literals = append(blk.literals, src[nextEmit:]...) - blk.extraLits = len(src) - int(nextEmit) - } - blk.recentOffsets[0] = uint32(offset1) - blk.recentOffsets[1] = uint32(offset2) - blk.recentOffsets[2] = uint32(offset3) - if debugEncoder { - println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) - } -} - -// EncodeNoHist will encode a block with no history and no following blocks. -// Most notable difference is that src will not be copied for history and -// we do not need to check for max match length. -func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { - e.ensureHist(len(src)) - e.Encode(blk, src) -} - -// Reset will reset and set a dictionary if not nil -func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { - e.resetBase(d, singleBlock) - if d == nil { - return - } - // Init or copy dict table - if len(e.dictTable) != len(e.table) || d.id != e.lastDictID { - if len(e.dictTable) != len(e.table) { - e.dictTable = make([]prevEntry, len(e.table)) - } - end := int32(len(d.content)) - 8 + e.maxMatchOff - for i := e.maxMatchOff; i < end; i += 4 { - const hashLog = bestShortTableBits - - cv := load6432(d.content, i-e.maxMatchOff) - nextHash := hashLen(cv, hashLog, bestShortLen) // 0 -> 4 - nextHash1 := hashLen(cv>>8, hashLog, bestShortLen) // 1 -> 5 - nextHash2 := hashLen(cv>>16, hashLog, bestShortLen) // 2 -> 6 - nextHash3 := hashLen(cv>>24, hashLog, bestShortLen) // 3 -> 7 - e.dictTable[nextHash] = prevEntry{ - prev: e.dictTable[nextHash].offset, - offset: i, - } - e.dictTable[nextHash1] = prevEntry{ - prev: e.dictTable[nextHash1].offset, - offset: i + 1, - } - e.dictTable[nextHash2] = prevEntry{ - prev: e.dictTable[nextHash2].offset, - offset: i + 2, - } - e.dictTable[nextHash3] = prevEntry{ - prev: e.dictTable[nextHash3].offset, - offset: i + 3, - } - } - e.lastDictID = d.id - } - - // Init or copy dict table - if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID { - if len(e.dictLongTable) != len(e.longTable) { - e.dictLongTable = make([]prevEntry, len(e.longTable)) - } - if len(d.content) >= 8 { - cv := load6432(d.content, 0) - h := hashLen(cv, bestLongTableBits, bestLongLen) - e.dictLongTable[h] = prevEntry{ - offset: e.maxMatchOff, - prev: e.dictLongTable[h].offset, - } - - end := int32(len(d.content)) - 8 + e.maxMatchOff - off := 8 // First to read - for i := e.maxMatchOff + 1; i < end; i++ { - cv = cv>>8 | (uint64(d.content[off]) << 56) - h := hashLen(cv, bestLongTableBits, bestLongLen) - e.dictLongTable[h] = prevEntry{ - offset: i, - prev: e.dictLongTable[h].offset, - } - off++ - } - } - e.lastDictID = d.id - } - // Reset table to initial state - copy(e.longTable[:], e.dictLongTable) - - e.cur = e.maxMatchOff - // Reset table to initial state - copy(e.table[:], e.dictTable) -} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go deleted file mode 100644 index 84a79fde7..000000000 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ /dev/null @@ -1,1252 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import "fmt" - -const ( - betterLongTableBits = 19 // Bits used in the long match table - betterLongTableSize = 1 << betterLongTableBits // Size of the table - betterLongLen = 8 // Bytes used for table hash - - // Note: Increasing the short table bits or making the hash shorter - // can actually lead to compression degradation since it will 'steal' more from the - // long match table and match offsets are quite big. - // This greatly depends on the type of input. - betterShortTableBits = 13 // Bits used in the short match table - betterShortTableSize = 1 << betterShortTableBits // Size of the table - betterShortLen = 5 // Bytes used for table hash - - betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table - betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard - - betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table - betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard -) - -type prevEntry struct { - offset int32 - prev int32 -} - -// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. -// The long match table contains the previous entry with the same hash, -// effectively making it a "chain" of length 2. -// When we find a long match we choose between the two values and select the longest. -// When we find a short match, after checking the long, we check if we can find a long at n+1 -// and that it is longer (lazy matching). -type betterFastEncoder struct { - fastBase - table [betterShortTableSize]tableEntry - longTable [betterLongTableSize]prevEntry -} - -type betterFastEncoderDict struct { - betterFastEncoder - dictTable []tableEntry - dictLongTable []prevEntry - shortTableShardDirty [betterShortTableShardCnt]bool - longTableShardDirty [betterLongTableShardCnt]bool - allDirty bool -} - -// Encode improves compression... -func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { - const ( - // Input margin is the number of bytes we read (8) - // and the maximum we will read ahead (2) - inputMargin = 8 + 2 - minNonLiteralBlockSize = 16 - ) - - // Protect against e.cur wraparound. - for e.cur >= e.bufferReset-int32(len(e.hist)) { - if len(e.hist) == 0 { - e.table = [betterShortTableSize]tableEntry{} - e.longTable = [betterLongTableSize]prevEntry{} - e.cur = e.maxMatchOff - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff - for i := range e.table[:] { - v := e.table[i].offset - if v < minOff { - v = 0 - } else { - v = v - e.cur + e.maxMatchOff - } - e.table[i].offset = v - } - for i := range e.longTable[:] { - v := e.longTable[i].offset - v2 := e.longTable[i].prev - if v < minOff { - v = 0 - v2 = 0 - } else { - v = v - e.cur + e.maxMatchOff - if v2 < minOff { - v2 = 0 - } else { - v2 = v2 - e.cur + e.maxMatchOff - } - } - e.longTable[i] = prevEntry{ - offset: v, - prev: v2, - } - } - e.cur = e.maxMatchOff - break - } - // Add block to history - s := e.addBlock(src) - blk.size = len(src) - - // Check RLE first - if len(src) > zstdMinMatch { - ml := matchLen(src[1:], src) - if ml == len(src)-1 { - blk.literals = append(blk.literals, src[0]) - blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3}) - return - } - } - - if len(src) < minNonLiteralBlockSize { - blk.extraLits = len(src) - blk.literals = blk.literals[:len(src)] - copy(blk.literals, src) - return - } - - // Override src - src = e.hist - sLimit := int32(len(src)) - inputMargin - // stepSize is the number of bytes to skip on every main loop iteration. - // It should be >= 1. - const stepSize = 1 - - const kSearchStrength = 9 - - // nextEmit is where in src the next emitLiteral should start from. - nextEmit := s - cv := load6432(src, s) - - // Relative offsets - offset1 := int32(blk.recentOffsets[0]) - offset2 := int32(blk.recentOffsets[1]) - - addLiterals := func(s *seq, until int32) { - if until == nextEmit { - return - } - blk.literals = append(blk.literals, src[nextEmit:until]...) - s.litLen = uint32(until - nextEmit) - } - if debugEncoder { - println("recent offsets:", blk.recentOffsets) - } - -encodeLoop: - for { - var t int32 - // We allow the encoder to optionally turn off repeat offsets across blocks - canRepeat := len(blk.sequences) > 2 - var matched, index0 int32 - - for { - if debugAsserts && canRepeat && offset1 == 0 { - panic("offset0 was 0") - } - - nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) - nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) - candidateL := e.longTable[nextHashL] - candidateS := e.table[nextHashS] - - const repOff = 1 - repIndex := s - offset1 + repOff - off := s + e.cur - e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} - e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} - index0 = s + 1 - - if canRepeat { - if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { - // Consider history as well. - var seq seq - length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - - seq.matchLen = uint32(length - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + repOff - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 0 - seq.offset = 1 - if debugSequences { - println("repeat sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - - // Index match start+1 (long) -> s - 1 - index0 := s + repOff - s += length + repOff - - nextEmit = s - if s >= sLimit { - if debugEncoder { - println("repeat ended", s, length) - - } - break encodeLoop - } - // Index skipped... - for index0 < s-1 { - cv0 := load6432(src, index0) - cv1 := cv0 >> 8 - h0 := hashLen(cv0, betterLongTableBits, betterLongLen) - off := index0 + e.cur - e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} - index0 += 2 - } - cv = load6432(src, s) - continue - } - const repOff2 = 1 - - // We deviate from the reference encoder and also check offset 2. - // Still slower and not much better, so disabled. - // repIndex = s - offset2 + repOff2 - if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { - // Consider history as well. - var seq seq - length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) - - seq.matchLen = uint32(length - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + repOff2 - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 2 - seq.offset = 2 - if debugSequences { - println("repeat sequence 2", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - - s += length + repOff2 - nextEmit = s - if s >= sLimit { - if debugEncoder { - println("repeat ended", s, length) - - } - break encodeLoop - } - - // Index skipped... - for index0 < s-1 { - cv0 := load6432(src, index0) - cv1 := cv0 >> 8 - h0 := hashLen(cv0, betterLongTableBits, betterLongLen) - off := index0 + e.cur - e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} - index0 += 2 - } - cv = load6432(src, s) - // Swap offsets - offset1, offset2 = offset2, offset1 - continue - } - } - // Find the offsets of our two matches. - coffsetL := candidateL.offset - e.cur - coffsetLP := candidateL.prev - e.cur - - // Check if we have a long match. - if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { - // Found a long match, at least 8 bytes. - matched = e.matchlen(s+8, coffsetL+8, src) + 8 - t = coffsetL - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugMatches { - println("long match") - } - - if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { - // Found a long match, at least 8 bytes. - prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 - if prevMatch > matched { - matched = prevMatch - t = coffsetLP - } - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugMatches { - println("long match") - } - } - break - } - - // Check if we have a long match on prev. - if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { - // Found a long match, at least 8 bytes. - matched = e.matchlen(s+8, coffsetLP+8, src) + 8 - t = coffsetLP - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugMatches { - println("long match") - } - break - } - - coffsetS := candidateS.offset - e.cur - - // Check if we have a short match. - if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { - // found a regular match - matched = e.matchlen(s+4, coffsetS+4, src) + 4 - - // See if we can find a long match at s+1 - const checkAt = 1 - cv := load6432(src, s+checkAt) - nextHashL = hashLen(cv, betterLongTableBits, betterLongLen) - candidateL = e.longTable[nextHashL] - coffsetL = candidateL.offset - e.cur - - // We can store it, since we have at least a 4 byte match. - e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} - if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { - // Found a long match, at least 8 bytes. - matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 - if matchedNext > matched { - t = coffsetL - s += checkAt - matched = matchedNext - if debugMatches { - println("long match (after short)") - } - break - } - } - - // Check prev long... - coffsetL = candidateL.prev - e.cur - if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { - // Found a long match, at least 8 bytes. - matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 - if matchedNext > matched { - t = coffsetL - s += checkAt - matched = matchedNext - if debugMatches { - println("prev long match (after short)") - } - break - } - } - t = coffsetS - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugAsserts && t < 0 { - panic("t<0") - } - if debugMatches { - println("short match") - } - break - } - - // No match found, move forward in input. - s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) - if s >= sLimit { - break encodeLoop - } - cv = load6432(src, s) - } - - // Try to find a better match by searching for a long match at the end of the current best match - if s+matched < sLimit { - // Allow some bytes at the beginning to mismatch. - // Sweet spot is around 3 bytes, but depends on input. - // The skipped bytes are tested in Extend backwards, - // and still picked up as part of the match if they do. - const skipBeginning = 3 - - nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) - s2 := s + skipBeginning - cv := load3232(src, s2) - candidateL := e.longTable[nextHashL] - coffsetL := candidateL.offset - e.cur - matched + skipBeginning - if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { - // Found a long match, at least 4 bytes. - matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4 - if matchedNext > matched { - t = coffsetL - s = s2 - matched = matchedNext - if debugMatches { - println("long match at end-of-match") - } - } - } - - // Check prev long... - if true { - coffsetL = candidateL.prev - e.cur - matched + skipBeginning - if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { - // Found a long match, at least 4 bytes. - matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4 - if matchedNext > matched { - t = coffsetL - s = s2 - matched = matchedNext - if debugMatches { - println("prev long match at end-of-match") - } - } - } - } - } - // A match has been found. Update recent offsets. - offset2 = offset1 - offset1 = s - t - - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - - if debugAsserts && canRepeat && int(offset1) > len(src) { - panic("invalid offset") - } - - // Extend the n-byte match as long as possible. - l := matched - - // Extend backwards - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { - s-- - t-- - l++ - } - - // Write our sequence - var seq seq - seq.litLen = uint32(s - nextEmit) - seq.matchLen = uint32(l - zstdMinMatch) - if seq.litLen > 0 { - blk.literals = append(blk.literals, src[nextEmit:s]...) - } - seq.offset = uint32(s-t) + 3 - s += l - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - nextEmit = s - if s >= sLimit { - break encodeLoop - } - - // Index match start+1 (long) -> s - 1 - off := index0 + e.cur - for index0 < s-1 { - cv0 := load6432(src, index0) - cv1 := cv0 >> 8 - h0 := hashLen(cv0, betterLongTableBits, betterLongLen) - e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} - index0 += 2 - off += 2 - } - - cv = load6432(src, s) - if !canRepeat { - continue - } - - // Check offset 2 - for { - o2 := s - offset2 - if load3232(src, o2) != uint32(cv) { - // Do regular search - break - } - - // Store this, since we have it. - nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) - nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) - - // We have at least 4 byte match. - // No need to check backwards. We come straight from a match - l := 4 + e.matchlen(s+4, o2+4, src) - - e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} - e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} - seq.matchLen = uint32(l) - zstdMinMatch - seq.litLen = 0 - - // Since litlen is always 0, this is offset 1. - seq.offset = 1 - s += l - nextEmit = s - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - - // Swap offset 1 and 2. - offset1, offset2 = offset2, offset1 - if s >= sLimit { - // Finished - break encodeLoop - } - cv = load6432(src, s) - } - } - - if int(nextEmit) < len(src) { - blk.literals = append(blk.literals, src[nextEmit:]...) - blk.extraLits = len(src) - int(nextEmit) - } - blk.recentOffsets[0] = uint32(offset1) - blk.recentOffsets[1] = uint32(offset2) - if debugEncoder { - println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) - } -} - -// EncodeNoHist will encode a block with no history and no following blocks. -// Most notable difference is that src will not be copied for history and -// we do not need to check for max match length. -func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { - e.ensureHist(len(src)) - e.Encode(blk, src) -} - -// Encode improves compression... -func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) { - const ( - // Input margin is the number of bytes we read (8) - // and the maximum we will read ahead (2) - inputMargin = 8 + 2 - minNonLiteralBlockSize = 16 - ) - - // Protect against e.cur wraparound. - for e.cur >= e.bufferReset-int32(len(e.hist)) { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.longTable[:] { - e.longTable[i] = prevEntry{} - } - e.cur = e.maxMatchOff - e.allDirty = true - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff - for i := range e.table[:] { - v := e.table[i].offset - if v < minOff { - v = 0 - } else { - v = v - e.cur + e.maxMatchOff - } - e.table[i].offset = v - } - for i := range e.longTable[:] { - v := e.longTable[i].offset - v2 := e.longTable[i].prev - if v < minOff { - v = 0 - v2 = 0 - } else { - v = v - e.cur + e.maxMatchOff - if v2 < minOff { - v2 = 0 - } else { - v2 = v2 - e.cur + e.maxMatchOff - } - } - e.longTable[i] = prevEntry{ - offset: v, - prev: v2, - } - } - e.allDirty = true - e.cur = e.maxMatchOff - break - } - - s := e.addBlock(src) - blk.size = len(src) - if len(src) < minNonLiteralBlockSize { - blk.extraLits = len(src) - blk.literals = blk.literals[:len(src)] - copy(blk.literals, src) - return - } - - // Override src - src = e.hist - sLimit := int32(len(src)) - inputMargin - // stepSize is the number of bytes to skip on every main loop iteration. - // It should be >= 1. - const stepSize = 1 - - const kSearchStrength = 9 - - // nextEmit is where in src the next emitLiteral should start from. - nextEmit := s - cv := load6432(src, s) - - // Relative offsets - offset1 := int32(blk.recentOffsets[0]) - offset2 := int32(blk.recentOffsets[1]) - - addLiterals := func(s *seq, until int32) { - if until == nextEmit { - return - } - blk.literals = append(blk.literals, src[nextEmit:until]...) - s.litLen = uint32(until - nextEmit) - } - if debugEncoder { - println("recent offsets:", blk.recentOffsets) - } - -encodeLoop: - for { - var t int32 - // We allow the encoder to optionally turn off repeat offsets across blocks - canRepeat := len(blk.sequences) > 2 - var matched, index0 int32 - - for { - if debugAsserts && canRepeat && offset1 == 0 { - panic("offset0 was 0") - } - - nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) - nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) - candidateL := e.longTable[nextHashL] - candidateS := e.table[nextHashS] - - const repOff = 1 - repIndex := s - offset1 + repOff - off := s + e.cur - e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} - e.markLongShardDirty(nextHashL) - e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} - e.markShortShardDirty(nextHashS) - index0 = s + 1 - - if canRepeat { - if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { - // Consider history as well. - var seq seq - length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - - seq.matchLen = uint32(length - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + repOff - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 0 - seq.offset = 1 - if debugSequences { - println("repeat sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - - // Index match start+1 (long) -> s - 1 - s += length + repOff - - nextEmit = s - if s >= sLimit { - if debugEncoder { - println("repeat ended", s, length) - - } - break encodeLoop - } - // Index skipped... - for index0 < s-1 { - cv0 := load6432(src, index0) - cv1 := cv0 >> 8 - h0 := hashLen(cv0, betterLongTableBits, betterLongLen) - off := index0 + e.cur - e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.markLongShardDirty(h0) - h1 := hashLen(cv1, betterShortTableBits, betterShortLen) - e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} - e.markShortShardDirty(h1) - index0 += 2 - } - cv = load6432(src, s) - continue - } - const repOff2 = 1 - - // We deviate from the reference encoder and also check offset 2. - // Still slower and not much better, so disabled. - // repIndex = s - offset2 + repOff2 - if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { - // Consider history as well. - var seq seq - length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) - - seq.matchLen = uint32(length - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + repOff2 - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 2 - seq.offset = 2 - if debugSequences { - println("repeat sequence 2", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - - s += length + repOff2 - nextEmit = s - if s >= sLimit { - if debugEncoder { - println("repeat ended", s, length) - - } - break encodeLoop - } - - // Index skipped... - for index0 < s-1 { - cv0 := load6432(src, index0) - cv1 := cv0 >> 8 - h0 := hashLen(cv0, betterLongTableBits, betterLongLen) - off := index0 + e.cur - e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.markLongShardDirty(h0) - h1 := hashLen(cv1, betterShortTableBits, betterShortLen) - e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} - e.markShortShardDirty(h1) - index0 += 2 - } - cv = load6432(src, s) - // Swap offsets - offset1, offset2 = offset2, offset1 - continue - } - } - // Find the offsets of our two matches. - coffsetL := candidateL.offset - e.cur - coffsetLP := candidateL.prev - e.cur - - // Check if we have a long match. - if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { - // Found a long match, at least 8 bytes. - matched = e.matchlen(s+8, coffsetL+8, src) + 8 - t = coffsetL - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugMatches { - println("long match") - } - - if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { - // Found a long match, at least 8 bytes. - prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 - if prevMatch > matched { - matched = prevMatch - t = coffsetLP - } - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugMatches { - println("long match") - } - } - break - } - - // Check if we have a long match on prev. - if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { - // Found a long match, at least 8 bytes. - matched = e.matchlen(s+8, coffsetLP+8, src) + 8 - t = coffsetLP - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugMatches { - println("long match") - } - break - } - - coffsetS := candidateS.offset - e.cur - - // Check if we have a short match. - if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { - // found a regular match - matched = e.matchlen(s+4, coffsetS+4, src) + 4 - - // See if we can find a long match at s+1 - const checkAt = 1 - cv := load6432(src, s+checkAt) - nextHashL = hashLen(cv, betterLongTableBits, betterLongLen) - candidateL = e.longTable[nextHashL] - coffsetL = candidateL.offset - e.cur - - // We can store it, since we have at least a 4 byte match. - e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} - e.markLongShardDirty(nextHashL) - if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { - // Found a long match, at least 8 bytes. - matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 - if matchedNext > matched { - t = coffsetL - s += checkAt - matched = matchedNext - if debugMatches { - println("long match (after short)") - } - break - } - } - - // Check prev long... - coffsetL = candidateL.prev - e.cur - if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { - // Found a long match, at least 8 bytes. - matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 - if matchedNext > matched { - t = coffsetL - s += checkAt - matched = matchedNext - if debugMatches { - println("prev long match (after short)") - } - break - } - } - t = coffsetS - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugAsserts && t < 0 { - panic("t<0") - } - if debugMatches { - println("short match") - } - break - } - - // No match found, move forward in input. - s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) - if s >= sLimit { - break encodeLoop - } - cv = load6432(src, s) - } - // Try to find a better match by searching for a long match at the end of the current best match - if s+matched < sLimit { - nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) - cv := load3232(src, s) - candidateL := e.longTable[nextHashL] - coffsetL := candidateL.offset - e.cur - matched - if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { - // Found a long match, at least 4 bytes. - matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 - if matchedNext > matched { - t = coffsetL - matched = matchedNext - if debugMatches { - println("long match at end-of-match") - } - } - } - - // Check prev long... - if true { - coffsetL = candidateL.prev - e.cur - matched - if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { - // Found a long match, at least 4 bytes. - matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 - if matchedNext > matched { - t = coffsetL - matched = matchedNext - if debugMatches { - println("prev long match at end-of-match") - } - } - } - } - } - // A match has been found. Update recent offsets. - offset2 = offset1 - offset1 = s - t - - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - - if debugAsserts && canRepeat && int(offset1) > len(src) { - panic("invalid offset") - } - - // Extend the n-byte match as long as possible. - l := matched - - // Extend backwards - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { - s-- - t-- - l++ - } - - // Write our sequence - var seq seq - seq.litLen = uint32(s - nextEmit) - seq.matchLen = uint32(l - zstdMinMatch) - if seq.litLen > 0 { - blk.literals = append(blk.literals, src[nextEmit:s]...) - } - seq.offset = uint32(s-t) + 3 - s += l - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - nextEmit = s - if s >= sLimit { - break encodeLoop - } - - // Index match start+1 (long) -> s - 1 - off := index0 + e.cur - for index0 < s-1 { - cv0 := load6432(src, index0) - cv1 := cv0 >> 8 - h0 := hashLen(cv0, betterLongTableBits, betterLongLen) - e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.markLongShardDirty(h0) - h1 := hashLen(cv1, betterShortTableBits, betterShortLen) - e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} - e.markShortShardDirty(h1) - index0 += 2 - off += 2 - } - - cv = load6432(src, s) - if !canRepeat { - continue - } - - // Check offset 2 - for { - o2 := s - offset2 - if load3232(src, o2) != uint32(cv) { - // Do regular search - break - } - - // Store this, since we have it. - nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) - nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) - - // We have at least 4 byte match. - // No need to check backwards. We come straight from a match - l := 4 + e.matchlen(s+4, o2+4, src) - - e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} - e.markLongShardDirty(nextHashL) - e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} - e.markShortShardDirty(nextHashS) - seq.matchLen = uint32(l) - zstdMinMatch - seq.litLen = 0 - - // Since litlen is always 0, this is offset 1. - seq.offset = 1 - s += l - nextEmit = s - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - - // Swap offset 1 and 2. - offset1, offset2 = offset2, offset1 - if s >= sLimit { - // Finished - break encodeLoop - } - cv = load6432(src, s) - } - } - - if int(nextEmit) < len(src) { - blk.literals = append(blk.literals, src[nextEmit:]...) - blk.extraLits = len(src) - int(nextEmit) - } - blk.recentOffsets[0] = uint32(offset1) - blk.recentOffsets[1] = uint32(offset2) - if debugEncoder { - println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) - } -} - -// ResetDict will reset and set a dictionary if not nil -func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) { - e.resetBase(d, singleBlock) - if d != nil { - panic("betterFastEncoder: Reset with dict") - } -} - -// ResetDict will reset and set a dictionary if not nil -func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) { - e.resetBase(d, singleBlock) - if d == nil { - return - } - // Init or copy dict table - if len(e.dictTable) != len(e.table) || d.id != e.lastDictID { - if len(e.dictTable) != len(e.table) { - e.dictTable = make([]tableEntry, len(e.table)) - } - end := int32(len(d.content)) - 8 + e.maxMatchOff - for i := e.maxMatchOff; i < end; i += 4 { - const hashLog = betterShortTableBits - - cv := load6432(d.content, i-e.maxMatchOff) - nextHash := hashLen(cv, hashLog, betterShortLen) // 0 -> 4 - nextHash1 := hashLen(cv>>8, hashLog, betterShortLen) // 1 -> 5 - nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6 - nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7 - e.dictTable[nextHash] = tableEntry{ - val: uint32(cv), - offset: i, - } - e.dictTable[nextHash1] = tableEntry{ - val: uint32(cv >> 8), - offset: i + 1, - } - e.dictTable[nextHash2] = tableEntry{ - val: uint32(cv >> 16), - offset: i + 2, - } - e.dictTable[nextHash3] = tableEntry{ - val: uint32(cv >> 24), - offset: i + 3, - } - } - e.lastDictID = d.id - e.allDirty = true - } - - // Init or copy dict table - if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID { - if len(e.dictLongTable) != len(e.longTable) { - e.dictLongTable = make([]prevEntry, len(e.longTable)) - } - if len(d.content) >= 8 { - cv := load6432(d.content, 0) - h := hashLen(cv, betterLongTableBits, betterLongLen) - e.dictLongTable[h] = prevEntry{ - offset: e.maxMatchOff, - prev: e.dictLongTable[h].offset, - } - - end := int32(len(d.content)) - 8 + e.maxMatchOff - off := 8 // First to read - for i := e.maxMatchOff + 1; i < end; i++ { - cv = cv>>8 | (uint64(d.content[off]) << 56) - h := hashLen(cv, betterLongTableBits, betterLongLen) - e.dictLongTable[h] = prevEntry{ - offset: i, - prev: e.dictLongTable[h].offset, - } - off++ - } - } - e.lastDictID = d.id - e.allDirty = true - } - - // Reset table to initial state - { - dirtyShardCnt := 0 - if !e.allDirty { - for i := range e.shortTableShardDirty { - if e.shortTableShardDirty[i] { - dirtyShardCnt++ - } - } - } - const shardCnt = betterShortTableShardCnt - const shardSize = betterShortTableShardSize - if e.allDirty || dirtyShardCnt > shardCnt*4/6 { - copy(e.table[:], e.dictTable) - for i := range e.shortTableShardDirty { - e.shortTableShardDirty[i] = false - } - } else { - for i := range e.shortTableShardDirty { - if !e.shortTableShardDirty[i] { - continue - } - - copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) - e.shortTableShardDirty[i] = false - } - } - } - { - dirtyShardCnt := 0 - if !e.allDirty { - for i := range e.shortTableShardDirty { - if e.shortTableShardDirty[i] { - dirtyShardCnt++ - } - } - } - const shardCnt = betterLongTableShardCnt - const shardSize = betterLongTableShardSize - if e.allDirty || dirtyShardCnt > shardCnt*4/6 { - copy(e.longTable[:], e.dictLongTable) - for i := range e.longTableShardDirty { - e.longTableShardDirty[i] = false - } - } else { - for i := range e.longTableShardDirty { - if !e.longTableShardDirty[i] { - continue - } - - copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize]) - e.longTableShardDirty[i] = false - } - } - } - e.cur = e.maxMatchOff - e.allDirty = false -} - -func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) { - e.longTableShardDirty[entryNum/betterLongTableShardSize] = true -} - -func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) { - e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true -} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go deleted file mode 100644 index d36be7bd8..000000000 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ /dev/null @@ -1,1123 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import "fmt" - -const ( - dFastLongTableBits = 17 // Bits used in the long match table - dFastLongTableSize = 1 << dFastLongTableBits // Size of the table - dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. - dFastLongLen = 8 // Bytes used for table hash - - dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table - dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard - - dFastShortTableBits = tableBits // Bits used in the short match table - dFastShortTableSize = 1 << dFastShortTableBits // Size of the table - dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. - dFastShortLen = 5 // Bytes used for table hash - -) - -type doubleFastEncoder struct { - fastEncoder - longTable [dFastLongTableSize]tableEntry -} - -type doubleFastEncoderDict struct { - fastEncoderDict - longTable [dFastLongTableSize]tableEntry - dictLongTable []tableEntry - longTableShardDirty [dLongTableShardCnt]bool -} - -// Encode mimmics functionality in zstd_dfast.c -func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { - const ( - // Input margin is the number of bytes we read (8) - // and the maximum we will read ahead (2) - inputMargin = 8 + 2 - minNonLiteralBlockSize = 16 - ) - - // Protect against e.cur wraparound. - for e.cur >= e.bufferReset-int32(len(e.hist)) { - if len(e.hist) == 0 { - e.table = [dFastShortTableSize]tableEntry{} - e.longTable = [dFastLongTableSize]tableEntry{} - e.cur = e.maxMatchOff - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff - for i := range e.table[:] { - v := e.table[i].offset - if v < minOff { - v = 0 - } else { - v = v - e.cur + e.maxMatchOff - } - e.table[i].offset = v - } - for i := range e.longTable[:] { - v := e.longTable[i].offset - if v < minOff { - v = 0 - } else { - v = v - e.cur + e.maxMatchOff - } - e.longTable[i].offset = v - } - e.cur = e.maxMatchOff - break - } - - s := e.addBlock(src) - blk.size = len(src) - if len(src) < minNonLiteralBlockSize { - blk.extraLits = len(src) - blk.literals = blk.literals[:len(src)] - copy(blk.literals, src) - return - } - - // Override src - src = e.hist - sLimit := int32(len(src)) - inputMargin - // stepSize is the number of bytes to skip on every main loop iteration. - // It should be >= 1. - const stepSize = 1 - - const kSearchStrength = 8 - - // nextEmit is where in src the next emitLiteral should start from. - nextEmit := s - cv := load6432(src, s) - - // Relative offsets - offset1 := int32(blk.recentOffsets[0]) - offset2 := int32(blk.recentOffsets[1]) - - addLiterals := func(s *seq, until int32) { - if until == nextEmit { - return - } - blk.literals = append(blk.literals, src[nextEmit:until]...) - s.litLen = uint32(until - nextEmit) - } - if debugEncoder { - println("recent offsets:", blk.recentOffsets) - } - -encodeLoop: - for { - var t int32 - // We allow the encoder to optionally turn off repeat offsets across blocks - canRepeat := len(blk.sequences) > 2 - - for { - if debugAsserts && canRepeat && offset1 == 0 { - panic("offset0 was 0") - } - - nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) - nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) - candidateL := e.longTable[nextHashL] - candidateS := e.table[nextHashS] - - const repOff = 1 - repIndex := s - offset1 + repOff - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} - e.longTable[nextHashL] = entry - e.table[nextHashS] = entry - - if canRepeat { - if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { - // Consider history as well. - var seq seq - length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - - seq.matchLen = uint32(length - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + repOff - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 0 - seq.offset = 1 - if debugSequences { - println("repeat sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - s += length + repOff - nextEmit = s - if s >= sLimit { - if debugEncoder { - println("repeat ended", s, length) - - } - break encodeLoop - } - cv = load6432(src, s) - continue - } - } - // Find the offsets of our two matches. - coffsetL := s - (candidateL.offset - e.cur) - coffsetS := s - (candidateS.offset - e.cur) - - // Check if we have a long match. - if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { - // Found a long match, likely at least 8 bytes. - // Reference encoder checks all 8 bytes, we only check 4, - // but the likelihood of both the first 4 bytes and the hash matching should be enough. - t = candidateL.offset - e.cur - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugMatches { - println("long match") - } - break - } - - // Check if we have a short match. - if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { - // found a regular match - // See if we can find a long match at s+1 - const checkAt = 1 - cv := load6432(src, s+checkAt) - nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) - candidateL = e.longTable[nextHashL] - coffsetL = s - (candidateL.offset - e.cur) + checkAt - - // We can store it, since we have at least a 4 byte match. - e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)} - if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { - // Found a long match, likely at least 8 bytes. - // Reference encoder checks all 8 bytes, we only check 4, - // but the likelihood of both the first 4 bytes and the hash matching should be enough. - t = candidateL.offset - e.cur - s += checkAt - if debugMatches { - println("long match (after short)") - } - break - } - - t = candidateS.offset - e.cur - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugAsserts && t < 0 { - panic("t<0") - } - if debugMatches { - println("short match") - } - break - } - - // No match found, move forward in input. - s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) - if s >= sLimit { - break encodeLoop - } - cv = load6432(src, s) - } - - // A 4-byte match has been found. Update recent offsets. - // We'll later see if more than 4 bytes. - offset2 = offset1 - offset1 = s - t - - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - - if debugAsserts && canRepeat && int(offset1) > len(src) { - panic("invalid offset") - } - - // Extend the 4-byte match as long as possible. - l := e.matchlen(s+4, t+4, src) + 4 - - // Extend backwards - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { - s-- - t-- - l++ - } - - // Write our sequence - var seq seq - seq.litLen = uint32(s - nextEmit) - seq.matchLen = uint32(l - zstdMinMatch) - if seq.litLen > 0 { - blk.literals = append(blk.literals, src[nextEmit:s]...) - } - seq.offset = uint32(s-t) + 3 - s += l - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - nextEmit = s - if s >= sLimit { - break encodeLoop - } - - // Index match start+1 (long) and start+2 (short) - index0 := s - l + 1 - // Index match end-2 (long) and end-1 (short) - index1 := s - 2 - - cv0 := load6432(src, index0) - cv1 := load6432(src, index1) - te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} - te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} - e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0 - e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1 - cv0 >>= 8 - cv1 >>= 8 - te0.offset++ - te1.offset++ - te0.val = uint32(cv0) - te1.val = uint32(cv1) - e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0 - e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1 - - cv = load6432(src, s) - - if !canRepeat { - continue - } - - // Check offset 2 - for { - o2 := s - offset2 - if load3232(src, o2) != uint32(cv) { - // Do regular search - break - } - - // Store this, since we have it. - nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) - nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) - - // We have at least 4 byte match. - // No need to check backwards. We come straight from a match - l := 4 + e.matchlen(s+4, o2+4, src) - - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} - e.longTable[nextHashL] = entry - e.table[nextHashS] = entry - seq.matchLen = uint32(l) - zstdMinMatch - seq.litLen = 0 - - // Since litlen is always 0, this is offset 1. - seq.offset = 1 - s += l - nextEmit = s - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - - // Swap offset 1 and 2. - offset1, offset2 = offset2, offset1 - if s >= sLimit { - // Finished - break encodeLoop - } - cv = load6432(src, s) - } - } - - if int(nextEmit) < len(src) { - blk.literals = append(blk.literals, src[nextEmit:]...) - blk.extraLits = len(src) - int(nextEmit) - } - blk.recentOffsets[0] = uint32(offset1) - blk.recentOffsets[1] = uint32(offset2) - if debugEncoder { - println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) - } -} - -// EncodeNoHist will encode a block with no history and no following blocks. -// Most notable difference is that src will not be copied for history and -// we do not need to check for max match length. -func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { - const ( - // Input margin is the number of bytes we read (8) - // and the maximum we will read ahead (2) - inputMargin = 8 + 2 - minNonLiteralBlockSize = 16 - ) - - // Protect against e.cur wraparound. - if e.cur >= e.bufferReset { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.longTable[:] { - e.longTable[i] = tableEntry{} - } - e.cur = e.maxMatchOff - } - - s := int32(0) - blk.size = len(src) - if len(src) < minNonLiteralBlockSize { - blk.extraLits = len(src) - blk.literals = blk.literals[:len(src)] - copy(blk.literals, src) - return - } - - // Override src - sLimit := int32(len(src)) - inputMargin - // stepSize is the number of bytes to skip on every main loop iteration. - // It should be >= 1. - const stepSize = 1 - - const kSearchStrength = 8 - - // nextEmit is where in src the next emitLiteral should start from. - nextEmit := s - cv := load6432(src, s) - - // Relative offsets - offset1 := int32(blk.recentOffsets[0]) - offset2 := int32(blk.recentOffsets[1]) - - addLiterals := func(s *seq, until int32) { - if until == nextEmit { - return - } - blk.literals = append(blk.literals, src[nextEmit:until]...) - s.litLen = uint32(until - nextEmit) - } - if debugEncoder { - println("recent offsets:", blk.recentOffsets) - } - -encodeLoop: - for { - var t int32 - for { - - nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) - nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) - candidateL := e.longTable[nextHashL] - candidateS := e.table[nextHashS] - - const repOff = 1 - repIndex := s - offset1 + repOff - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} - e.longTable[nextHashL] = entry - e.table[nextHashS] = entry - - if len(blk.sequences) > 2 { - if load3232(src, repIndex) == uint32(cv>>(repOff*8)) { - // Consider history as well. - var seq seq - //length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - length := 4 + int32(matchLen(src[s+4+repOff:], src[repIndex+4:])) - - seq.matchLen = uint32(length - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + repOff - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 0 - seq.offset = 1 - if debugSequences { - println("repeat sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - s += length + repOff - nextEmit = s - if s >= sLimit { - if debugEncoder { - println("repeat ended", s, length) - - } - break encodeLoop - } - cv = load6432(src, s) - continue - } - } - // Find the offsets of our two matches. - coffsetL := s - (candidateL.offset - e.cur) - coffsetS := s - (candidateS.offset - e.cur) - - // Check if we have a long match. - if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { - // Found a long match, likely at least 8 bytes. - // Reference encoder checks all 8 bytes, we only check 4, - // but the likelihood of both the first 4 bytes and the hash matching should be enough. - t = candidateL.offset - e.cur - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d). cur: %d", s, t, e.cur)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugMatches { - println("long match") - } - break - } - - // Check if we have a short match. - if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { - // found a regular match - // See if we can find a long match at s+1 - const checkAt = 1 - cv := load6432(src, s+checkAt) - nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) - candidateL = e.longTable[nextHashL] - coffsetL = s - (candidateL.offset - e.cur) + checkAt - - // We can store it, since we have at least a 4 byte match. - e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)} - if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { - // Found a long match, likely at least 8 bytes. - // Reference encoder checks all 8 bytes, we only check 4, - // but the likelihood of both the first 4 bytes and the hash matching should be enough. - t = candidateL.offset - e.cur - s += checkAt - if debugMatches { - println("long match (after short)") - } - break - } - - t = candidateS.offset - e.cur - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugAsserts && t < 0 { - panic("t<0") - } - if debugMatches { - println("short match") - } - break - } - - // No match found, move forward in input. - s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) - if s >= sLimit { - break encodeLoop - } - cv = load6432(src, s) - } - - // A 4-byte match has been found. Update recent offsets. - // We'll later see if more than 4 bytes. - offset2 = offset1 - offset1 = s - t - - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - - // Extend the 4-byte match as long as possible. - //l := e.matchlen(s+4, t+4, src) + 4 - l := int32(matchLen(src[s+4:], src[t+4:])) + 4 - - // Extend backwards - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for t > tMin && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - - // Write our sequence - var seq seq - seq.litLen = uint32(s - nextEmit) - seq.matchLen = uint32(l - zstdMinMatch) - if seq.litLen > 0 { - blk.literals = append(blk.literals, src[nextEmit:s]...) - } - seq.offset = uint32(s-t) + 3 - s += l - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - nextEmit = s - if s >= sLimit { - break encodeLoop - } - - // Index match start+1 (long) and start+2 (short) - index0 := s - l + 1 - // Index match end-2 (long) and end-1 (short) - index1 := s - 2 - - cv0 := load6432(src, index0) - cv1 := load6432(src, index1) - te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} - te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} - e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0 - e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1 - cv0 >>= 8 - cv1 >>= 8 - te0.offset++ - te1.offset++ - te0.val = uint32(cv0) - te1.val = uint32(cv1) - e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0 - e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1 - - cv = load6432(src, s) - - if len(blk.sequences) <= 2 { - continue - } - - // Check offset 2 - for { - o2 := s - offset2 - if load3232(src, o2) != uint32(cv) { - // Do regular search - break - } - - // Store this, since we have it. - nextHashS := hashLen(cv1>>8, dFastShortTableBits, dFastShortLen) - nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) - - // We have at least 4 byte match. - // No need to check backwards. We come straight from a match - //l := 4 + e.matchlen(s+4, o2+4, src) - l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) - - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} - e.longTable[nextHashL] = entry - e.table[nextHashS] = entry - seq.matchLen = uint32(l) - zstdMinMatch - seq.litLen = 0 - - // Since litlen is always 0, this is offset 1. - seq.offset = 1 - s += l - nextEmit = s - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - - // Swap offset 1 and 2. - offset1, offset2 = offset2, offset1 - if s >= sLimit { - // Finished - break encodeLoop - } - cv = load6432(src, s) - } - } - - if int(nextEmit) < len(src) { - blk.literals = append(blk.literals, src[nextEmit:]...) - blk.extraLits = len(src) - int(nextEmit) - } - if debugEncoder { - println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) - } - - // We do not store history, so we must offset e.cur to avoid false matches for next user. - if e.cur < e.bufferReset { - e.cur += int32(len(src)) - } -} - -// Encode will encode the content, with a dictionary if initialized for it. -func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) { - const ( - // Input margin is the number of bytes we read (8) - // and the maximum we will read ahead (2) - inputMargin = 8 + 2 - minNonLiteralBlockSize = 16 - ) - - // Protect against e.cur wraparound. - for e.cur >= e.bufferReset-int32(len(e.hist)) { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.longTable[:] { - e.longTable[i] = tableEntry{} - } - e.markAllShardsDirty() - e.cur = e.maxMatchOff - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff - for i := range e.table[:] { - v := e.table[i].offset - if v < minOff { - v = 0 - } else { - v = v - e.cur + e.maxMatchOff - } - e.table[i].offset = v - } - for i := range e.longTable[:] { - v := e.longTable[i].offset - if v < minOff { - v = 0 - } else { - v = v - e.cur + e.maxMatchOff - } - e.longTable[i].offset = v - } - e.markAllShardsDirty() - e.cur = e.maxMatchOff - break - } - - s := e.addBlock(src) - blk.size = len(src) - if len(src) < minNonLiteralBlockSize { - blk.extraLits = len(src) - blk.literals = blk.literals[:len(src)] - copy(blk.literals, src) - return - } - - // Override src - src = e.hist - sLimit := int32(len(src)) - inputMargin - // stepSize is the number of bytes to skip on every main loop iteration. - // It should be >= 1. - const stepSize = 1 - - const kSearchStrength = 8 - - // nextEmit is where in src the next emitLiteral should start from. - nextEmit := s - cv := load6432(src, s) - - // Relative offsets - offset1 := int32(blk.recentOffsets[0]) - offset2 := int32(blk.recentOffsets[1]) - - addLiterals := func(s *seq, until int32) { - if until == nextEmit { - return - } - blk.literals = append(blk.literals, src[nextEmit:until]...) - s.litLen = uint32(until - nextEmit) - } - if debugEncoder { - println("recent offsets:", blk.recentOffsets) - } - -encodeLoop: - for { - var t int32 - // We allow the encoder to optionally turn off repeat offsets across blocks - canRepeat := len(blk.sequences) > 2 - - for { - if debugAsserts && canRepeat && offset1 == 0 { - panic("offset0 was 0") - } - - nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) - nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) - candidateL := e.longTable[nextHashL] - candidateS := e.table[nextHashS] - - const repOff = 1 - repIndex := s - offset1 + repOff - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} - e.longTable[nextHashL] = entry - e.markLongShardDirty(nextHashL) - e.table[nextHashS] = entry - e.markShardDirty(nextHashS) - - if canRepeat { - if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { - // Consider history as well. - var seq seq - length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - - seq.matchLen = uint32(length - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + repOff - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 0 - seq.offset = 1 - if debugSequences { - println("repeat sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - s += length + repOff - nextEmit = s - if s >= sLimit { - if debugEncoder { - println("repeat ended", s, length) - - } - break encodeLoop - } - cv = load6432(src, s) - continue - } - } - // Find the offsets of our two matches. - coffsetL := s - (candidateL.offset - e.cur) - coffsetS := s - (candidateS.offset - e.cur) - - // Check if we have a long match. - if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { - // Found a long match, likely at least 8 bytes. - // Reference encoder checks all 8 bytes, we only check 4, - // but the likelihood of both the first 4 bytes and the hash matching should be enough. - t = candidateL.offset - e.cur - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugMatches { - println("long match") - } - break - } - - // Check if we have a short match. - if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { - // found a regular match - // See if we can find a long match at s+1 - const checkAt = 1 - cv := load6432(src, s+checkAt) - nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) - candidateL = e.longTable[nextHashL] - coffsetL = s - (candidateL.offset - e.cur) + checkAt - - // We can store it, since we have at least a 4 byte match. - e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)} - e.markLongShardDirty(nextHashL) - if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { - // Found a long match, likely at least 8 bytes. - // Reference encoder checks all 8 bytes, we only check 4, - // but the likelihood of both the first 4 bytes and the hash matching should be enough. - t = candidateL.offset - e.cur - s += checkAt - if debugMatches { - println("long match (after short)") - } - break - } - - t = candidateS.offset - e.cur - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugAsserts && t < 0 { - panic("t<0") - } - if debugMatches { - println("short match") - } - break - } - - // No match found, move forward in input. - s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) - if s >= sLimit { - break encodeLoop - } - cv = load6432(src, s) - } - - // A 4-byte match has been found. Update recent offsets. - // We'll later see if more than 4 bytes. - offset2 = offset1 - offset1 = s - t - - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - - if debugAsserts && canRepeat && int(offset1) > len(src) { - panic("invalid offset") - } - - // Extend the 4-byte match as long as possible. - l := e.matchlen(s+4, t+4, src) + 4 - - // Extend backwards - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { - s-- - t-- - l++ - } - - // Write our sequence - var seq seq - seq.litLen = uint32(s - nextEmit) - seq.matchLen = uint32(l - zstdMinMatch) - if seq.litLen > 0 { - blk.literals = append(blk.literals, src[nextEmit:s]...) - } - seq.offset = uint32(s-t) + 3 - s += l - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - nextEmit = s - if s >= sLimit { - break encodeLoop - } - - // Index match start+1 (long) and start+2 (short) - index0 := s - l + 1 - // Index match end-2 (long) and end-1 (short) - index1 := s - 2 - - cv0 := load6432(src, index0) - cv1 := load6432(src, index1) - te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} - te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} - longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen) - longHash2 := hashLen(cv1, dFastLongTableBits, dFastLongLen) - e.longTable[longHash1] = te0 - e.longTable[longHash2] = te1 - e.markLongShardDirty(longHash1) - e.markLongShardDirty(longHash2) - cv0 >>= 8 - cv1 >>= 8 - te0.offset++ - te1.offset++ - te0.val = uint32(cv0) - te1.val = uint32(cv1) - hashVal1 := hashLen(cv0, dFastShortTableBits, dFastShortLen) - hashVal2 := hashLen(cv1, dFastShortTableBits, dFastShortLen) - e.table[hashVal1] = te0 - e.markShardDirty(hashVal1) - e.table[hashVal2] = te1 - e.markShardDirty(hashVal2) - - cv = load6432(src, s) - - if !canRepeat { - continue - } - - // Check offset 2 - for { - o2 := s - offset2 - if load3232(src, o2) != uint32(cv) { - // Do regular search - break - } - - // Store this, since we have it. - nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) - nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) - - // We have at least 4 byte match. - // No need to check backwards. We come straight from a match - l := 4 + e.matchlen(s+4, o2+4, src) - - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} - e.longTable[nextHashL] = entry - e.markLongShardDirty(nextHashL) - e.table[nextHashS] = entry - e.markShardDirty(nextHashS) - seq.matchLen = uint32(l) - zstdMinMatch - seq.litLen = 0 - - // Since litlen is always 0, this is offset 1. - seq.offset = 1 - s += l - nextEmit = s - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - - // Swap offset 1 and 2. - offset1, offset2 = offset2, offset1 - if s >= sLimit { - // Finished - break encodeLoop - } - cv = load6432(src, s) - } - } - - if int(nextEmit) < len(src) { - blk.literals = append(blk.literals, src[nextEmit:]...) - blk.extraLits = len(src) - int(nextEmit) - } - blk.recentOffsets[0] = uint32(offset1) - blk.recentOffsets[1] = uint32(offset2) - if debugEncoder { - println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) - } - // If we encoded more than 64K mark all dirty. - if len(src) > 64<<10 { - e.markAllShardsDirty() - } -} - -// ResetDict will reset and set a dictionary if not nil -func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) { - e.fastEncoder.Reset(d, singleBlock) - if d != nil { - panic("doubleFastEncoder: Reset with dict not supported") - } -} - -// ResetDict will reset and set a dictionary if not nil -func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) { - allDirty := e.allDirty - e.fastEncoderDict.Reset(d, singleBlock) - if d == nil { - return - } - - // Init or copy dict table - if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID { - if len(e.dictLongTable) != len(e.longTable) { - e.dictLongTable = make([]tableEntry, len(e.longTable)) - } - if len(d.content) >= 8 { - cv := load6432(d.content, 0) - e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{ - val: uint32(cv), - offset: e.maxMatchOff, - } - end := int32(len(d.content)) - 8 + e.maxMatchOff - for i := e.maxMatchOff + 1; i < end; i++ { - cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56) - e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{ - val: uint32(cv), - offset: i, - } - } - } - e.lastDictID = d.id - allDirty = true - } - // Reset table to initial state - e.cur = e.maxMatchOff - - dirtyShardCnt := 0 - if !allDirty { - for i := range e.longTableShardDirty { - if e.longTableShardDirty[i] { - dirtyShardCnt++ - } - } - } - - if allDirty || dirtyShardCnt > dLongTableShardCnt/2 { - //copy(e.longTable[:], e.dictLongTable) - e.longTable = *(*[dFastLongTableSize]tableEntry)(e.dictLongTable) - for i := range e.longTableShardDirty { - e.longTableShardDirty[i] = false - } - return - } - for i := range e.longTableShardDirty { - if !e.longTableShardDirty[i] { - continue - } - - // copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize]) - *(*[dLongTableShardSize]tableEntry)(e.longTable[i*dLongTableShardSize:]) = *(*[dLongTableShardSize]tableEntry)(e.dictLongTable[i*dLongTableShardSize:]) - - e.longTableShardDirty[i] = false - } -} - -func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) { - e.longTableShardDirty[entryNum/dLongTableShardSize] = true -} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go deleted file mode 100644 index f45a3da7d..000000000 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ /dev/null @@ -1,891 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "fmt" -) - -const ( - tableBits = 15 // Bits used in the table - tableSize = 1 << tableBits // Size of the table - tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table - tableShardSize = tableSize / tableShardCnt // Size of an individual shard - tableFastHashLen = 6 - tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. - maxMatchLength = 131074 -) - -type tableEntry struct { - val uint32 - offset int32 -} - -type fastEncoder struct { - fastBase - table [tableSize]tableEntry -} - -type fastEncoderDict struct { - fastEncoder - dictTable []tableEntry - tableShardDirty [tableShardCnt]bool - allDirty bool -} - -// Encode mimmics functionality in zstd_fast.c -func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { - const ( - inputMargin = 8 - minNonLiteralBlockSize = 1 + 1 + inputMargin - ) - - // Protect against e.cur wraparound. - for e.cur >= e.bufferReset-int32(len(e.hist)) { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - e.cur = e.maxMatchOff - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff - for i := range e.table[:] { - v := e.table[i].offset - if v < minOff { - v = 0 - } else { - v = v - e.cur + e.maxMatchOff - } - e.table[i].offset = v - } - e.cur = e.maxMatchOff - break - } - - s := e.addBlock(src) - blk.size = len(src) - if len(src) < minNonLiteralBlockSize { - blk.extraLits = len(src) - blk.literals = blk.literals[:len(src)] - copy(blk.literals, src) - return - } - - // Override src - src = e.hist - sLimit := int32(len(src)) - inputMargin - // stepSize is the number of bytes to skip on every main loop iteration. - // It should be >= 2. - const stepSize = 2 - - // TEMPLATE - const hashLog = tableBits - // seems global, but would be nice to tweak. - const kSearchStrength = 6 - - // nextEmit is where in src the next emitLiteral should start from. - nextEmit := s - cv := load6432(src, s) - - // Relative offsets - offset1 := int32(blk.recentOffsets[0]) - offset2 := int32(blk.recentOffsets[1]) - - addLiterals := func(s *seq, until int32) { - if until == nextEmit { - return - } - blk.literals = append(blk.literals, src[nextEmit:until]...) - s.litLen = uint32(until - nextEmit) - } - if debugEncoder { - println("recent offsets:", blk.recentOffsets) - } - -encodeLoop: - for { - // t will contain the match offset when we find one. - // When existing the search loop, we have already checked 4 bytes. - var t int32 - - // We will not use repeat offsets across blocks. - // By not using them for the first 3 matches - canRepeat := len(blk.sequences) > 2 - - for { - if debugAsserts && canRepeat && offset1 == 0 { - panic("offset0 was 0") - } - - nextHash := hashLen(cv, hashLog, tableFastHashLen) - nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen) - candidate := e.table[nextHash] - candidate2 := e.table[nextHash2] - repIndex := s - offset1 + 2 - - e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} - e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)} - - if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { - // Consider history as well. - var seq seq - length := 4 + e.matchlen(s+6, repIndex+4, src) - seq.matchLen = uint32(length - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + 2 - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - sMin := s - e.maxMatchOff - if sMin < 0 { - sMin = 0 - } - for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 0 - seq.offset = 1 - if debugSequences { - println("repeat sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - s += length + 2 - nextEmit = s - if s >= sLimit { - if debugEncoder { - println("repeat ended", s, length) - - } - break encodeLoop - } - cv = load6432(src, s) - continue - } - coffset0 := s - (candidate.offset - e.cur) - coffset1 := s - (candidate2.offset - e.cur) + 1 - if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { - // found a regular match - t = candidate.offset - e.cur - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - break - } - - if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val { - // found a regular match - t = candidate2.offset - e.cur - s++ - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugAsserts && t < 0 { - panic("t<0") - } - break - } - s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) - if s >= sLimit { - break encodeLoop - } - cv = load6432(src, s) - } - // A 4-byte match has been found. We'll later see if more than 4 bytes. - offset2 = offset1 - offset1 = s - t - - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - - if debugAsserts && canRepeat && int(offset1) > len(src) { - panic("invalid offset") - } - - // Extend the 4-byte match as long as possible. - l := e.matchlen(s+4, t+4, src) + 4 - - // Extend backwards - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { - s-- - t-- - l++ - } - - // Write our sequence. - var seq seq - seq.litLen = uint32(s - nextEmit) - seq.matchLen = uint32(l - zstdMinMatch) - if seq.litLen > 0 { - blk.literals = append(blk.literals, src[nextEmit:s]...) - } - // Don't use repeat offsets - seq.offset = uint32(s-t) + 3 - s += l - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - nextEmit = s - if s >= sLimit { - break encodeLoop - } - cv = load6432(src, s) - - // Check offset 2 - if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { - // We have at least 4 byte match. - // No need to check backwards. We come straight from a match - l := 4 + e.matchlen(s+4, o2+4, src) - - // Store this, since we have it. - nextHash := hashLen(cv, hashLog, tableFastHashLen) - e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} - seq.matchLen = uint32(l) - zstdMinMatch - seq.litLen = 0 - // Since litlen is always 0, this is offset 1. - seq.offset = 1 - s += l - nextEmit = s - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - - // Swap offset 1 and 2. - offset1, offset2 = offset2, offset1 - if s >= sLimit { - break encodeLoop - } - // Prepare next loop. - cv = load6432(src, s) - } - } - - if int(nextEmit) < len(src) { - blk.literals = append(blk.literals, src[nextEmit:]...) - blk.extraLits = len(src) - int(nextEmit) - } - blk.recentOffsets[0] = uint32(offset1) - blk.recentOffsets[1] = uint32(offset2) - if debugEncoder { - println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) - } -} - -// EncodeNoHist will encode a block with no history and no following blocks. -// Most notable difference is that src will not be copied for history and -// we do not need to check for max match length. -func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { - const ( - inputMargin = 8 - minNonLiteralBlockSize = 1 + 1 + inputMargin - ) - if debugEncoder { - if len(src) > maxCompressedBlockSize { - panic("src too big") - } - } - - // Protect against e.cur wraparound. - if e.cur >= e.bufferReset { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - e.cur = e.maxMatchOff - } - - s := int32(0) - blk.size = len(src) - if len(src) < minNonLiteralBlockSize { - blk.extraLits = len(src) - blk.literals = blk.literals[:len(src)] - copy(blk.literals, src) - return - } - - sLimit := int32(len(src)) - inputMargin - // stepSize is the number of bytes to skip on every main loop iteration. - // It should be >= 2. - const stepSize = 2 - - // TEMPLATE - const hashLog = tableBits - // seems global, but would be nice to tweak. - const kSearchStrength = 6 - - // nextEmit is where in src the next emitLiteral should start from. - nextEmit := s - cv := load6432(src, s) - - // Relative offsets - offset1 := int32(blk.recentOffsets[0]) - offset2 := int32(blk.recentOffsets[1]) - - addLiterals := func(s *seq, until int32) { - if until == nextEmit { - return - } - blk.literals = append(blk.literals, src[nextEmit:until]...) - s.litLen = uint32(until - nextEmit) - } - if debugEncoder { - println("recent offsets:", blk.recentOffsets) - } - -encodeLoop: - for { - // t will contain the match offset when we find one. - // When existing the search loop, we have already checked 4 bytes. - var t int32 - - // We will not use repeat offsets across blocks. - // By not using them for the first 3 matches - - for { - nextHash := hashLen(cv, hashLog, tableFastHashLen) - nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen) - candidate := e.table[nextHash] - candidate2 := e.table[nextHash2] - repIndex := s - offset1 + 2 - - e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} - e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)} - - if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) { - // Consider history as well. - var seq seq - length := 4 + e.matchlen(s+6, repIndex+4, src) - - seq.matchLen = uint32(length - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + 2 - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - sMin := s - e.maxMatchOff - if sMin < 0 { - sMin = 0 - } - for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 0 - seq.offset = 1 - if debugSequences { - println("repeat sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - s += length + 2 - nextEmit = s - if s >= sLimit { - if debugEncoder { - println("repeat ended", s, length) - - } - break encodeLoop - } - cv = load6432(src, s) - continue - } - coffset0 := s - (candidate.offset - e.cur) - coffset1 := s - (candidate2.offset - e.cur) + 1 - if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { - // found a regular match - t = candidate.offset - e.cur - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugAsserts && t < 0 { - panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff)) - } - break - } - - if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val { - // found a regular match - t = candidate2.offset - e.cur - s++ - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugAsserts && t < 0 { - panic("t<0") - } - break - } - s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) - if s >= sLimit { - break encodeLoop - } - cv = load6432(src, s) - } - // A 4-byte match has been found. We'll later see if more than 4 bytes. - offset2 = offset1 - offset1 = s - t - - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - - if debugAsserts && t < 0 { - panic(fmt.Sprintf("t (%d) < 0 ", t)) - } - // Extend the 4-byte match as long as possible. - l := e.matchlen(s+4, t+4, src) + 4 - - // Extend backwards - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for t > tMin && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - - // Write our sequence. - var seq seq - seq.litLen = uint32(s - nextEmit) - seq.matchLen = uint32(l - zstdMinMatch) - if seq.litLen > 0 { - blk.literals = append(blk.literals, src[nextEmit:s]...) - } - // Don't use repeat offsets - seq.offset = uint32(s-t) + 3 - s += l - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - nextEmit = s - if s >= sLimit { - break encodeLoop - } - cv = load6432(src, s) - - // Check offset 2 - if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) { - // We have at least 4 byte match. - // No need to check backwards. We come straight from a match - l := 4 + e.matchlen(s+4, o2+4, src) - - // Store this, since we have it. - nextHash := hashLen(cv, hashLog, tableFastHashLen) - e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} - seq.matchLen = uint32(l) - zstdMinMatch - seq.litLen = 0 - // Since litlen is always 0, this is offset 1. - seq.offset = 1 - s += l - nextEmit = s - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - - // Swap offset 1 and 2. - offset1, offset2 = offset2, offset1 - if s >= sLimit { - break encodeLoop - } - // Prepare next loop. - cv = load6432(src, s) - } - } - - if int(nextEmit) < len(src) { - blk.literals = append(blk.literals, src[nextEmit:]...) - blk.extraLits = len(src) - int(nextEmit) - } - if debugEncoder { - println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) - } - // We do not store history, so we must offset e.cur to avoid false matches for next user. - if e.cur < e.bufferReset { - e.cur += int32(len(src)) - } -} - -// Encode will encode the content, with a dictionary if initialized for it. -func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) { - const ( - inputMargin = 8 - minNonLiteralBlockSize = 1 + 1 + inputMargin - ) - if e.allDirty || len(src) > 32<<10 { - e.fastEncoder.Encode(blk, src) - e.allDirty = true - return - } - // Protect against e.cur wraparound. - for e.cur >= e.bufferReset-int32(len(e.hist)) { - if len(e.hist) == 0 { - e.table = [tableSize]tableEntry{} - e.cur = e.maxMatchOff - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff - for i := range e.table[:] { - v := e.table[i].offset - if v < minOff { - v = 0 - } else { - v = v - e.cur + e.maxMatchOff - } - e.table[i].offset = v - } - e.cur = e.maxMatchOff - break - } - - s := e.addBlock(src) - blk.size = len(src) - if len(src) < minNonLiteralBlockSize { - blk.extraLits = len(src) - blk.literals = blk.literals[:len(src)] - copy(blk.literals, src) - return - } - - // Override src - src = e.hist - sLimit := int32(len(src)) - inputMargin - // stepSize is the number of bytes to skip on every main loop iteration. - // It should be >= 2. - const stepSize = 2 - - // TEMPLATE - const hashLog = tableBits - // seems global, but would be nice to tweak. - const kSearchStrength = 7 - - // nextEmit is where in src the next emitLiteral should start from. - nextEmit := s - cv := load6432(src, s) - - // Relative offsets - offset1 := int32(blk.recentOffsets[0]) - offset2 := int32(blk.recentOffsets[1]) - - addLiterals := func(s *seq, until int32) { - if until == nextEmit { - return - } - blk.literals = append(blk.literals, src[nextEmit:until]...) - s.litLen = uint32(until - nextEmit) - } - if debugEncoder { - println("recent offsets:", blk.recentOffsets) - } - -encodeLoop: - for { - // t will contain the match offset when we find one. - // When existing the search loop, we have already checked 4 bytes. - var t int32 - - // We will not use repeat offsets across blocks. - // By not using them for the first 3 matches - canRepeat := len(blk.sequences) > 2 - - for { - if debugAsserts && canRepeat && offset1 == 0 { - panic("offset0 was 0") - } - - nextHash := hashLen(cv, hashLog, tableFastHashLen) - nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen) - candidate := e.table[nextHash] - candidate2 := e.table[nextHash2] - repIndex := s - offset1 + 2 - - e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} - e.markShardDirty(nextHash) - e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)} - e.markShardDirty(nextHash2) - - if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { - // Consider history as well. - var seq seq - length := 4 + e.matchlen(s+6, repIndex+4, src) - - seq.matchLen = uint32(length - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + 2 - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - sMin := s - e.maxMatchOff - if sMin < 0 { - sMin = 0 - } - for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 0 - seq.offset = 1 - if debugSequences { - println("repeat sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - s += length + 2 - nextEmit = s - if s >= sLimit { - if debugEncoder { - println("repeat ended", s, length) - - } - break encodeLoop - } - cv = load6432(src, s) - continue - } - coffset0 := s - (candidate.offset - e.cur) - coffset1 := s - (candidate2.offset - e.cur) + 1 - if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { - // found a regular match - t = candidate.offset - e.cur - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - break - } - - if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val { - // found a regular match - t = candidate2.offset - e.cur - s++ - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - if debugAsserts && s-t > e.maxMatchOff { - panic("s - t >e.maxMatchOff") - } - if debugAsserts && t < 0 { - panic("t<0") - } - break - } - s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) - if s >= sLimit { - break encodeLoop - } - cv = load6432(src, s) - } - // A 4-byte match has been found. We'll later see if more than 4 bytes. - offset2 = offset1 - offset1 = s - t - - if debugAsserts && s <= t { - panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) - } - - if debugAsserts && canRepeat && int(offset1) > len(src) { - panic("invalid offset") - } - - // Extend the 4-byte match as long as possible. - l := e.matchlen(s+4, t+4, src) + 4 - - // Extend backwards - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { - s-- - t-- - l++ - } - - // Write our sequence. - var seq seq - seq.litLen = uint32(s - nextEmit) - seq.matchLen = uint32(l - zstdMinMatch) - if seq.litLen > 0 { - blk.literals = append(blk.literals, src[nextEmit:s]...) - } - // Don't use repeat offsets - seq.offset = uint32(s-t) + 3 - s += l - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - nextEmit = s - if s >= sLimit { - break encodeLoop - } - cv = load6432(src, s) - - // Check offset 2 - if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { - // We have at least 4 byte match. - // No need to check backwards. We come straight from a match - l := 4 + e.matchlen(s+4, o2+4, src) - - // Store this, since we have it. - nextHash := hashLen(cv, hashLog, tableFastHashLen) - e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} - e.markShardDirty(nextHash) - seq.matchLen = uint32(l) - zstdMinMatch - seq.litLen = 0 - // Since litlen is always 0, this is offset 1. - seq.offset = 1 - s += l - nextEmit = s - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - - // Swap offset 1 and 2. - offset1, offset2 = offset2, offset1 - if s >= sLimit { - break encodeLoop - } - // Prepare next loop. - cv = load6432(src, s) - } - } - - if int(nextEmit) < len(src) { - blk.literals = append(blk.literals, src[nextEmit:]...) - blk.extraLits = len(src) - int(nextEmit) - } - blk.recentOffsets[0] = uint32(offset1) - blk.recentOffsets[1] = uint32(offset2) - if debugEncoder { - println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) - } -} - -// ResetDict will reset and set a dictionary if not nil -func (e *fastEncoder) Reset(d *dict, singleBlock bool) { - e.resetBase(d, singleBlock) - if d != nil { - panic("fastEncoder: Reset with dict") - } -} - -// ResetDict will reset and set a dictionary if not nil -func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) { - e.resetBase(d, singleBlock) - if d == nil { - return - } - - // Init or copy dict table - if len(e.dictTable) != len(e.table) || d.id != e.lastDictID { - if len(e.dictTable) != len(e.table) { - e.dictTable = make([]tableEntry, len(e.table)) - } - if true { - end := e.maxMatchOff + int32(len(d.content)) - 8 - for i := e.maxMatchOff; i < end; i += 2 { - const hashLog = tableBits - - cv := load6432(d.content, i-e.maxMatchOff) - nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 6 - nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 7 - e.dictTable[nextHash] = tableEntry{ - val: uint32(cv), - offset: i, - } - e.dictTable[nextHash1] = tableEntry{ - val: uint32(cv >> 8), - offset: i + 1, - } - } - } - e.lastDictID = d.id - e.allDirty = true - } - - e.cur = e.maxMatchOff - dirtyShardCnt := 0 - if !e.allDirty { - for i := range e.tableShardDirty { - if e.tableShardDirty[i] { - dirtyShardCnt++ - } - } - } - - const shardCnt = tableShardCnt - const shardSize = tableShardSize - if e.allDirty || dirtyShardCnt > shardCnt*4/6 { - //copy(e.table[:], e.dictTable) - e.table = *(*[tableSize]tableEntry)(e.dictTable) - for i := range e.tableShardDirty { - e.tableShardDirty[i] = false - } - e.allDirty = false - return - } - for i := range e.tableShardDirty { - if !e.tableShardDirty[i] { - continue - } - - //copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) - *(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:]) - e.tableShardDirty[i] = false - } - e.allDirty = false -} - -func (e *fastEncoderDict) markAllShardsDirty() { - e.allDirty = true -} - -func (e *fastEncoderDict) markShardDirty(entryNum uint32) { - e.tableShardDirty[entryNum/tableShardSize] = true -} diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go deleted file mode 100644 index 8f8223cd3..000000000 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ /dev/null @@ -1,642 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "crypto/rand" - "errors" - "fmt" - "io" - "math" - rdebug "runtime/debug" - "sync" - - "github.com/klauspost/compress/zstd/internal/xxhash" -) - -// Encoder provides encoding to Zstandard. -// An Encoder can be used for either compressing a stream via the -// io.WriteCloser interface supported by the Encoder or as multiple independent -// tasks via the EncodeAll function. -// Smaller encodes are encouraged to use the EncodeAll function. -// Use NewWriter to create a new instance. -type Encoder struct { - o encoderOptions - encoders chan encoder - state encoderState - init sync.Once -} - -type encoder interface { - Encode(blk *blockEnc, src []byte) - EncodeNoHist(blk *blockEnc, src []byte) - Block() *blockEnc - CRC() *xxhash.Digest - AppendCRC([]byte) []byte - WindowSize(size int64) int32 - UseBlock(*blockEnc) - Reset(d *dict, singleBlock bool) -} - -type encoderState struct { - w io.Writer - filling []byte - current []byte - previous []byte - encoder encoder - writing *blockEnc - err error - writeErr error - nWritten int64 - nInput int64 - frameContentSize int64 - headerWritten bool - eofWritten bool - fullFrameWritten bool - - // This waitgroup indicates an encode is running. - wg sync.WaitGroup - // This waitgroup indicates we have a block encoding/writing. - wWg sync.WaitGroup -} - -// NewWriter will create a new Zstandard encoder. -// If the encoder will be used for encoding blocks a nil writer can be used. -func NewWriter(w io.Writer, opts ...EOption) (*Encoder, error) { - initPredefined() - var e Encoder - e.o.setDefault() - for _, o := range opts { - err := o(&e.o) - if err != nil { - return nil, err - } - } - if w != nil { - e.Reset(w) - } - return &e, nil -} - -func (e *Encoder) initialize() { - if e.o.concurrent == 0 { - e.o.setDefault() - } - e.encoders = make(chan encoder, e.o.concurrent) - for i := 0; i < e.o.concurrent; i++ { - enc := e.o.encoder() - e.encoders <- enc - } -} - -// Reset will re-initialize the writer and new writes will encode to the supplied writer -// as a new, independent stream. -func (e *Encoder) Reset(w io.Writer) { - s := &e.state - s.wg.Wait() - s.wWg.Wait() - if cap(s.filling) == 0 { - s.filling = make([]byte, 0, e.o.blockSize) - } - if e.o.concurrent > 1 { - if cap(s.current) == 0 { - s.current = make([]byte, 0, e.o.blockSize) - } - if cap(s.previous) == 0 { - s.previous = make([]byte, 0, e.o.blockSize) - } - s.current = s.current[:0] - s.previous = s.previous[:0] - if s.writing == nil { - s.writing = &blockEnc{lowMem: e.o.lowMem} - s.writing.init() - } - s.writing.initNewEncode() - } - if s.encoder == nil { - s.encoder = e.o.encoder() - } - s.filling = s.filling[:0] - s.encoder.Reset(e.o.dict, false) - s.headerWritten = false - s.eofWritten = false - s.fullFrameWritten = false - s.w = w - s.err = nil - s.nWritten = 0 - s.nInput = 0 - s.writeErr = nil - s.frameContentSize = 0 -} - -// ResetContentSize will reset and set a content size for the next stream. -// If the bytes written does not match the size given an error will be returned -// when calling Close(). -// This is removed when Reset is called. -// Sizes <= 0 results in no content size set. -func (e *Encoder) ResetContentSize(w io.Writer, size int64) { - e.Reset(w) - if size >= 0 { - e.state.frameContentSize = size - } -} - -// Write data to the encoder. -// Input data will be buffered and as the buffer fills up -// content will be compressed and written to the output. -// When done writing, use Close to flush the remaining output -// and write CRC if requested. -func (e *Encoder) Write(p []byte) (n int, err error) { - s := &e.state - if s.eofWritten { - return 0, ErrEncoderClosed - } - for len(p) > 0 { - if len(p)+len(s.filling) < e.o.blockSize { - if e.o.crc { - _, _ = s.encoder.CRC().Write(p) - } - s.filling = append(s.filling, p...) - return n + len(p), nil - } - add := p - if len(p)+len(s.filling) > e.o.blockSize { - add = add[:e.o.blockSize-len(s.filling)] - } - if e.o.crc { - _, _ = s.encoder.CRC().Write(add) - } - s.filling = append(s.filling, add...) - p = p[len(add):] - n += len(add) - if len(s.filling) < e.o.blockSize { - return n, nil - } - err := e.nextBlock(false) - if err != nil { - return n, err - } - if debugAsserts && len(s.filling) > 0 { - panic(len(s.filling)) - } - } - return n, nil -} - -// nextBlock will synchronize and start compressing input in e.state.filling. -// If an error has occurred during encoding it will be returned. -func (e *Encoder) nextBlock(final bool) error { - s := &e.state - // Wait for current block. - s.wg.Wait() - if s.err != nil { - return s.err - } - if len(s.filling) > e.o.blockSize { - return fmt.Errorf("block > maxStoreBlockSize") - } - if !s.headerWritten { - // If we have a single block encode, do a sync compression. - if final && len(s.filling) == 0 && !e.o.fullZero { - s.headerWritten = true - s.fullFrameWritten = true - s.eofWritten = true - return nil - } - if final && len(s.filling) > 0 { - s.current = e.encodeAll(s.encoder, s.filling, s.current[:0]) - var n2 int - n2, s.err = s.w.Write(s.current) - if s.err != nil { - return s.err - } - s.nWritten += int64(n2) - s.nInput += int64(len(s.filling)) - s.current = s.current[:0] - s.filling = s.filling[:0] - s.headerWritten = true - s.fullFrameWritten = true - s.eofWritten = true - return nil - } - - var tmp [maxHeaderSize]byte - fh := frameHeader{ - ContentSize: uint64(s.frameContentSize), - WindowSize: uint32(s.encoder.WindowSize(s.frameContentSize)), - SingleSegment: false, - Checksum: e.o.crc, - DictID: e.o.dict.ID(), - } - - dst := fh.appendTo(tmp[:0]) - s.headerWritten = true - s.wWg.Wait() - var n2 int - n2, s.err = s.w.Write(dst) - if s.err != nil { - return s.err - } - s.nWritten += int64(n2) - } - if s.eofWritten { - // Ensure we only write it once. - final = false - } - - if len(s.filling) == 0 { - // Final block, but no data. - if final { - enc := s.encoder - blk := enc.Block() - blk.reset(nil) - blk.last = true - blk.encodeRaw(nil) - s.wWg.Wait() - _, s.err = s.w.Write(blk.output) - s.nWritten += int64(len(blk.output)) - s.eofWritten = true - } - return s.err - } - - // SYNC: - if e.o.concurrent == 1 { - src := s.filling - s.nInput += int64(len(s.filling)) - if debugEncoder { - println("Adding sync block,", len(src), "bytes, final:", final) - } - enc := s.encoder - blk := enc.Block() - blk.reset(nil) - enc.Encode(blk, src) - blk.last = final - if final { - s.eofWritten = true - } - - s.err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) - if s.err != nil { - return s.err - } - _, s.err = s.w.Write(blk.output) - s.nWritten += int64(len(blk.output)) - s.filling = s.filling[:0] - return s.err - } - - // Move blocks forward. - s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current - s.nInput += int64(len(s.current)) - s.wg.Add(1) - if final { - s.eofWritten = true - } - go func(src []byte) { - if debugEncoder { - println("Adding block,", len(src), "bytes, final:", final) - } - defer func() { - if r := recover(); r != nil { - s.err = fmt.Errorf("panic while encoding: %v", r) - rdebug.PrintStack() - } - s.wg.Done() - }() - enc := s.encoder - blk := enc.Block() - enc.Encode(blk, src) - blk.last = final - // Wait for pending writes. - s.wWg.Wait() - if s.writeErr != nil { - s.err = s.writeErr - return - } - // Transfer encoders from previous write block. - blk.swapEncoders(s.writing) - // Transfer recent offsets to next. - enc.UseBlock(s.writing) - s.writing = blk - s.wWg.Add(1) - go func() { - defer func() { - if r := recover(); r != nil { - s.writeErr = fmt.Errorf("panic while encoding/writing: %v", r) - rdebug.PrintStack() - } - s.wWg.Done() - }() - s.writeErr = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) - if s.writeErr != nil { - return - } - _, s.writeErr = s.w.Write(blk.output) - s.nWritten += int64(len(blk.output)) - }() - }(s.current) - return nil -} - -// ReadFrom reads data from r until EOF or error. -// The return value n is the number of bytes read. -// Any error except io.EOF encountered during the read is also returned. -// -// The Copy function uses ReaderFrom if available. -func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) { - if debugEncoder { - println("Using ReadFrom") - } - - // Flush any current writes. - if len(e.state.filling) > 0 { - if err := e.nextBlock(false); err != nil { - return 0, err - } - } - e.state.filling = e.state.filling[:e.o.blockSize] - src := e.state.filling - for { - n2, err := r.Read(src) - if e.o.crc { - _, _ = e.state.encoder.CRC().Write(src[:n2]) - } - // src is now the unfilled part... - src = src[n2:] - n += int64(n2) - switch err { - case io.EOF: - e.state.filling = e.state.filling[:len(e.state.filling)-len(src)] - if debugEncoder { - println("ReadFrom: got EOF final block:", len(e.state.filling)) - } - return n, nil - case nil: - default: - if debugEncoder { - println("ReadFrom: got error:", err) - } - e.state.err = err - return n, err - } - if len(src) > 0 { - if debugEncoder { - println("ReadFrom: got space left in source:", len(src)) - } - continue - } - err = e.nextBlock(false) - if err != nil { - return n, err - } - e.state.filling = e.state.filling[:e.o.blockSize] - src = e.state.filling - } -} - -// Flush will send the currently written data to output -// and block until everything has been written. -// This should only be used on rare occasions where pushing the currently queued data is critical. -func (e *Encoder) Flush() error { - s := &e.state - if len(s.filling) > 0 { - err := e.nextBlock(false) - if err != nil { - // Ignore Flush after Close. - if errors.Is(s.err, ErrEncoderClosed) { - return nil - } - return err - } - } - s.wg.Wait() - s.wWg.Wait() - if s.err != nil { - // Ignore Flush after Close. - if errors.Is(s.err, ErrEncoderClosed) { - return nil - } - return s.err - } - return s.writeErr -} - -// Close will flush the final output and close the stream. -// The function will block until everything has been written. -// The Encoder can still be re-used after calling this. -func (e *Encoder) Close() error { - s := &e.state - if s.encoder == nil { - return nil - } - err := e.nextBlock(true) - if err != nil { - if errors.Is(s.err, ErrEncoderClosed) { - return nil - } - return err - } - if s.frameContentSize > 0 { - if s.nInput != s.frameContentSize { - return fmt.Errorf("frame content size %d given, but %d bytes was written", s.frameContentSize, s.nInput) - } - } - if e.state.fullFrameWritten { - return s.err - } - s.wg.Wait() - s.wWg.Wait() - - if s.err != nil { - return s.err - } - if s.writeErr != nil { - return s.writeErr - } - - // Write CRC - if e.o.crc && s.err == nil { - // heap alloc. - var tmp [4]byte - _, s.err = s.w.Write(s.encoder.AppendCRC(tmp[:0])) - s.nWritten += 4 - } - - // Add padding with content from crypto/rand.Reader - if s.err == nil && e.o.pad > 0 { - add := calcSkippableFrame(s.nWritten, int64(e.o.pad)) - frame, err := skippableFrame(s.filling[:0], add, rand.Reader) - if err != nil { - return err - } - _, s.err = s.w.Write(frame) - } - if s.err == nil { - s.err = ErrEncoderClosed - return nil - } - - return s.err -} - -// EncodeAll will encode all input in src and append it to dst. -// This function can be called concurrently, but each call will only run on a single goroutine. -// If empty input is given, nothing is returned, unless WithZeroFrames is specified. -// Encoded blocks can be concatenated and the result will be the combined input stream. -// Data compressed with EncodeAll can be decoded with the Decoder, -// using either a stream or DecodeAll. -func (e *Encoder) EncodeAll(src, dst []byte) []byte { - e.init.Do(e.initialize) - enc := <-e.encoders - defer func() { - e.encoders <- enc - }() - return e.encodeAll(enc, src, dst) -} - -func (e *Encoder) encodeAll(enc encoder, src, dst []byte) []byte { - if len(src) == 0 { - if e.o.fullZero { - // Add frame header. - fh := frameHeader{ - ContentSize: 0, - WindowSize: MinWindowSize, - SingleSegment: true, - // Adding a checksum would be a waste of space. - Checksum: false, - DictID: 0, - } - dst = fh.appendTo(dst) - - // Write raw block as last one only. - var blk blockHeader - blk.setSize(0) - blk.setType(blockTypeRaw) - blk.setLast(true) - dst = blk.appendTo(dst) - } - return dst - } - - // Use single segments when above minimum window and below window size. - single := len(src) <= e.o.windowSize && len(src) > MinWindowSize - if e.o.single != nil { - single = *e.o.single - } - fh := frameHeader{ - ContentSize: uint64(len(src)), - WindowSize: uint32(enc.WindowSize(int64(len(src)))), - SingleSegment: single, - Checksum: e.o.crc, - DictID: e.o.dict.ID(), - } - - // If less than 1MB, allocate a buffer up front. - if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem { - dst = make([]byte, 0, len(src)) - } - dst = fh.appendTo(dst) - - // If we can do everything in one block, prefer that. - if len(src) <= e.o.blockSize { - enc.Reset(e.o.dict, true) - // Slightly faster with no history and everything in one block. - if e.o.crc { - _, _ = enc.CRC().Write(src) - } - blk := enc.Block() - blk.last = true - if e.o.dict == nil { - enc.EncodeNoHist(blk, src) - } else { - enc.Encode(blk, src) - } - - // If we got the exact same number of literals as input, - // assume the literals cannot be compressed. - oldout := blk.output - // Output directly to dst - blk.output = dst - - err := blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) - if err != nil { - panic(err) - } - dst = blk.output - blk.output = oldout - } else { - enc.Reset(e.o.dict, false) - blk := enc.Block() - for len(src) > 0 { - todo := src - if len(todo) > e.o.blockSize { - todo = todo[:e.o.blockSize] - } - src = src[len(todo):] - if e.o.crc { - _, _ = enc.CRC().Write(todo) - } - blk.pushOffsets() - enc.Encode(blk, todo) - if len(src) == 0 { - blk.last = true - } - err := blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy) - if err != nil { - panic(err) - } - dst = append(dst, blk.output...) - blk.reset(nil) - } - } - if e.o.crc { - dst = enc.AppendCRC(dst) - } - // Add padding with content from crypto/rand.Reader - if e.o.pad > 0 { - add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad)) - var err error - dst, err = skippableFrame(dst, add, rand.Reader) - if err != nil { - panic(err) - } - } - return dst -} - -// MaxEncodedSize returns the expected maximum -// size of an encoded block or stream. -func (e *Encoder) MaxEncodedSize(size int) int { - frameHeader := 4 + 2 // magic + frame header & window descriptor - if e.o.dict != nil { - frameHeader += 4 - } - // Frame content size: - if size < 256 { - frameHeader++ - } else if size < 65536+256 { - frameHeader += 2 - } else if size < math.MaxInt32 { - frameHeader += 4 - } else { - frameHeader += 8 - } - // Final crc - if e.o.crc { - frameHeader += 4 - } - - // Max overhead is 3 bytes/block. - // There cannot be 0 blocks. - blocks := (size + e.o.blockSize) / e.o.blockSize - - // Combine, add padding. - maxSz := frameHeader + 3*blocks + size - if e.o.pad > 1 { - maxSz += calcSkippableFrame(int64(maxSz), int64(e.o.pad)) - } - return maxSz -} diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go deleted file mode 100644 index 20671dcb9..000000000 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ /dev/null @@ -1,339 +0,0 @@ -package zstd - -import ( - "errors" - "fmt" - "math" - "math/bits" - "runtime" - "strings" -) - -// EOption is an option for creating a encoder. -type EOption func(*encoderOptions) error - -// options retains accumulated state of multiple options. -type encoderOptions struct { - concurrent int - level EncoderLevel - single *bool - pad int - blockSize int - windowSize int - crc bool - fullZero bool - noEntropy bool - allLitEntropy bool - customWindow bool - customALEntropy bool - customBlockSize bool - lowMem bool - dict *dict -} - -func (o *encoderOptions) setDefault() { - *o = encoderOptions{ - concurrent: runtime.GOMAXPROCS(0), - crc: true, - single: nil, - blockSize: maxCompressedBlockSize, - windowSize: 8 << 20, - level: SpeedDefault, - allLitEntropy: false, - lowMem: false, - } -} - -// encoder returns an encoder with the selected options. -func (o encoderOptions) encoder() encoder { - switch o.level { - case SpeedFastest: - if o.dict != nil { - return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}} - } - return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}} - - case SpeedDefault: - if o.dict != nil { - return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}} - } - return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}} - case SpeedBetterCompression: - if o.dict != nil { - return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}} - } - return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}} - case SpeedBestCompression: - return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}} - } - panic("unknown compression level") -} - -// WithEncoderCRC will add CRC value to output. -// Output will be 4 bytes larger. -func WithEncoderCRC(b bool) EOption { - return func(o *encoderOptions) error { o.crc = b; return nil } -} - -// WithEncoderConcurrency will set the concurrency, -// meaning the maximum number of encoders to run concurrently. -// The value supplied must be at least 1. -// For streams, setting a value of 1 will disable async compression. -// By default this will be set to GOMAXPROCS. -func WithEncoderConcurrency(n int) EOption { - return func(o *encoderOptions) error { - if n <= 0 { - return fmt.Errorf("concurrency must be at least 1") - } - o.concurrent = n - return nil - } -} - -// WithWindowSize will set the maximum allowed back-reference distance. -// The value must be a power of two between MinWindowSize and MaxWindowSize. -// A larger value will enable better compression but allocate more memory and, -// for above-default values, take considerably longer. -// The default value is determined by the compression level and max 8MB. -func WithWindowSize(n int) EOption { - return func(o *encoderOptions) error { - switch { - case n < MinWindowSize: - return fmt.Errorf("window size must be at least %d", MinWindowSize) - case n > MaxWindowSize: - return fmt.Errorf("window size must be at most %d", MaxWindowSize) - case (n & (n - 1)) != 0: - return errors.New("window size must be a power of 2") - } - - o.windowSize = n - o.customWindow = true - if o.blockSize > o.windowSize { - o.blockSize = o.windowSize - o.customBlockSize = true - } - return nil - } -} - -// WithEncoderPadding will add padding to all output so the size will be a multiple of n. -// This can be used to obfuscate the exact output size or make blocks of a certain size. -// The contents will be a skippable frame, so it will be invisible by the decoder. -// n must be > 0 and <= 1GB, 1<<30 bytes. -// The padded area will be filled with data from crypto/rand.Reader. -// If `EncodeAll` is used with data already in the destination, the total size will be multiple of this. -func WithEncoderPadding(n int) EOption { - return func(o *encoderOptions) error { - if n <= 0 { - return fmt.Errorf("padding must be at least 1") - } - // No need to waste our time. - if n == 1 { - n = 0 - } - if n > 1<<30 { - return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ") - } - o.pad = n - return nil - } -} - -// EncoderLevel predefines encoder compression levels. -// Only use the constants made available, since the actual mapping -// of these values are very likely to change and your compression could change -// unpredictably when upgrading the library. -type EncoderLevel int - -const ( - speedNotSet EncoderLevel = iota - - // SpeedFastest will choose the fastest reasonable compression. - // This is roughly equivalent to the fastest Zstandard mode. - SpeedFastest - - // SpeedDefault is the default "pretty fast" compression option. - // This is roughly equivalent to the default Zstandard mode (level 3). - SpeedDefault - - // SpeedBetterCompression will yield better compression than the default. - // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage. - // By using this, notice that CPU usage may go up in the future. - SpeedBetterCompression - - // SpeedBestCompression will choose the best available compression option. - // This will offer the best compression no matter the CPU cost. - SpeedBestCompression - - // speedLast should be kept as the last actual compression option. - // The is not for external usage, but is used to keep track of the valid options. - speedLast -) - -// EncoderLevelFromString will convert a string representation of an encoding level back -// to a compression level. The compare is not case sensitive. -// If the string wasn't recognized, (false, SpeedDefault) will be returned. -func EncoderLevelFromString(s string) (bool, EncoderLevel) { - for l := speedNotSet + 1; l < speedLast; l++ { - if strings.EqualFold(s, l.String()) { - return true, l - } - } - return false, SpeedDefault -} - -// EncoderLevelFromZstd will return an encoder level that closest matches the compression -// ratio of a specific zstd compression level. -// Many input values will provide the same compression level. -func EncoderLevelFromZstd(level int) EncoderLevel { - switch { - case level < 3: - return SpeedFastest - case level >= 3 && level < 6: - return SpeedDefault - case level >= 6 && level < 10: - return SpeedBetterCompression - default: - return SpeedBestCompression - } -} - -// String provides a string representation of the compression level. -func (e EncoderLevel) String() string { - switch e { - case SpeedFastest: - return "fastest" - case SpeedDefault: - return "default" - case SpeedBetterCompression: - return "better" - case SpeedBestCompression: - return "best" - default: - return "invalid" - } -} - -// WithEncoderLevel specifies a predefined compression level. -func WithEncoderLevel(l EncoderLevel) EOption { - return func(o *encoderOptions) error { - switch { - case l <= speedNotSet || l >= speedLast: - return fmt.Errorf("unknown encoder level") - } - o.level = l - if !o.customWindow { - switch o.level { - case SpeedFastest: - o.windowSize = 4 << 20 - if !o.customBlockSize { - o.blockSize = 1 << 16 - } - case SpeedDefault: - o.windowSize = 8 << 20 - case SpeedBetterCompression: - o.windowSize = 8 << 20 - case SpeedBestCompression: - o.windowSize = 8 << 20 - } - } - if !o.customALEntropy { - o.allLitEntropy = l > SpeedDefault - } - - return nil - } -} - -// WithZeroFrames will encode 0 length input as full frames. -// This can be needed for compatibility with zstandard usage, -// but is not needed for this package. -func WithZeroFrames(b bool) EOption { - return func(o *encoderOptions) error { - o.fullZero = b - return nil - } -} - -// WithAllLitEntropyCompression will apply entropy compression if no matches are found. -// Disabling this will skip incompressible data faster, but in cases with no matches but -// skewed character distribution compression is lost. -// Default value depends on the compression level selected. -func WithAllLitEntropyCompression(b bool) EOption { - return func(o *encoderOptions) error { - o.customALEntropy = true - o.allLitEntropy = b - return nil - } -} - -// WithNoEntropyCompression will always skip entropy compression of literals. -// This can be useful if content has matches, but unlikely to benefit from entropy -// compression. Usually the slight speed improvement is not worth enabling this. -func WithNoEntropyCompression(b bool) EOption { - return func(o *encoderOptions) error { - o.noEntropy = b - return nil - } -} - -// WithSingleSegment will set the "single segment" flag when EncodeAll is used. -// If this flag is set, data must be regenerated within a single continuous memory segment. -// In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present. -// As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content. -// In order to preserve the decoder from unreasonable memory requirements, -// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range. -// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB. -// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations. -// If this is not specified, block encodes will automatically choose this based on the input size and the window size. -// This setting has no effect on streamed encodes. -func WithSingleSegment(b bool) EOption { - return func(o *encoderOptions) error { - o.single = &b - return nil - } -} - -// WithLowerEncoderMem will trade in some memory cases trade less memory usage for -// slower encoding speed. -// This will not change the window size which is the primary function for reducing -// memory usage. See WithWindowSize. -func WithLowerEncoderMem(b bool) EOption { - return func(o *encoderOptions) error { - o.lowMem = b - return nil - } -} - -// WithEncoderDict allows to register a dictionary that will be used for the encode. -// -// The slice dict must be in the [dictionary format] produced by -// "zstd --train" from the Zstandard reference implementation. -// -// The encoder *may* choose to use no dictionary instead for certain payloads. -// -// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format -func WithEncoderDict(dict []byte) EOption { - return func(o *encoderOptions) error { - d, err := loadDict(dict) - if err != nil { - return err - } - o.dict = d - return nil - } -} - -// WithEncoderDictRaw registers a dictionary that may be used by the encoder. -// -// The slice content may contain arbitrary data. It will be used as an initial -// history. -func WithEncoderDictRaw(id uint32, content []byte) EOption { - return func(o *encoderOptions) error { - if bits.UintSize > 32 && uint(len(content)) > dictMaxLength { - return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content)) - } - o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}} - return nil - } -} diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go deleted file mode 100644 index e47af66e7..000000000 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ /dev/null @@ -1,415 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "encoding/binary" - "encoding/hex" - "errors" - "io" - - "github.com/klauspost/compress/zstd/internal/xxhash" -) - -type frameDec struct { - o decoderOptions - crc *xxhash.Digest - - WindowSize uint64 - - // Frame history passed between blocks - history history - - rawInput byteBuffer - - // Byte buffer that can be reused for small input blocks. - bBuf byteBuf - - FrameContentSize uint64 - - DictionaryID uint32 - HasCheckSum bool - SingleSegment bool -} - -const ( - // MinWindowSize is the minimum Window Size, which is 1 KB. - MinWindowSize = 1 << 10 - - // MaxWindowSize is the maximum encoder window size - // and the default decoder maximum window size. - MaxWindowSize = 1 << 29 -) - -const ( - frameMagic = "\x28\xb5\x2f\xfd" - skippableFrameMagic = "\x2a\x4d\x18" -) - -func newFrameDec(o decoderOptions) *frameDec { - if o.maxWindowSize > o.maxDecodedSize { - o.maxWindowSize = o.maxDecodedSize - } - d := frameDec{ - o: o, - } - return &d -} - -// reset will read the frame header and prepare for block decoding. -// If nothing can be read from the input, io.EOF will be returned. -// Any other error indicated that the stream contained data, but -// there was a problem. -func (d *frameDec) reset(br byteBuffer) error { - d.HasCheckSum = false - d.WindowSize = 0 - var signature [4]byte - for { - var err error - // Check if we can read more... - b, err := br.readSmall(1) - switch err { - case io.EOF, io.ErrUnexpectedEOF: - return io.EOF - case nil: - signature[0] = b[0] - default: - return err - } - // Read the rest, don't allow io.ErrUnexpectedEOF - b, err = br.readSmall(3) - switch err { - case io.EOF: - return io.EOF - case nil: - copy(signature[1:], b) - default: - return err - } - - if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 { - if debugDecoder { - println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString([]byte(skippableFrameMagic))) - } - // Break if not skippable frame. - break - } - // Read size to skip - b, err = br.readSmall(4) - if err != nil { - if debugDecoder { - println("Reading Frame Size", err) - } - return err - } - n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) - println("Skipping frame with", n, "bytes.") - err = br.skipN(int64(n)) - if err != nil { - if debugDecoder { - println("Reading discarded frame", err) - } - return err - } - } - if string(signature[:]) != frameMagic { - if debugDecoder { - println("Got magic numbers: ", signature, "want:", []byte(frameMagic)) - } - return ErrMagicMismatch - } - - // Read Frame_Header_Descriptor - fhd, err := br.readByte() - if err != nil { - if debugDecoder { - println("Reading Frame_Header_Descriptor", err) - } - return err - } - d.SingleSegment = fhd&(1<<5) != 0 - - if fhd&(1<<3) != 0 { - return errors.New("reserved bit set on frame header") - } - - // Read Window_Descriptor - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor - d.WindowSize = 0 - if !d.SingleSegment { - wd, err := br.readByte() - if err != nil { - if debugDecoder { - println("Reading Window_Descriptor", err) - } - return err - } - if debugDecoder { - printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) - } - windowLog := 10 + (wd >> 3) - windowBase := uint64(1) << windowLog - windowAdd := (windowBase / 8) * uint64(wd&0x7) - d.WindowSize = windowBase + windowAdd - } - - // Read Dictionary_ID - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id - d.DictionaryID = 0 - if size := fhd & 3; size != 0 { - if size == 3 { - size = 4 - } - - b, err := br.readSmall(int(size)) - if err != nil { - println("Reading Dictionary_ID", err) - return err - } - var id uint32 - switch len(b) { - case 1: - id = uint32(b[0]) - case 2: - id = uint32(b[0]) | (uint32(b[1]) << 8) - case 4: - id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) - } - if debugDecoder { - println("Dict size", size, "ID:", id) - } - d.DictionaryID = id - } - - // Read Frame_Content_Size - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size - var fcsSize int - v := fhd >> 6 - switch v { - case 0: - if d.SingleSegment { - fcsSize = 1 - } - default: - fcsSize = 1 << v - } - d.FrameContentSize = fcsUnknown - if fcsSize > 0 { - b, err := br.readSmall(fcsSize) - if err != nil { - println("Reading Frame content", err) - return err - } - switch len(b) { - case 1: - d.FrameContentSize = uint64(b[0]) - case 2: - // When FCS_Field_Size is 2, the offset of 256 is added. - d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256 - case 4: - d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24) - case 8: - d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) - d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24) - d.FrameContentSize = uint64(d1) | (uint64(d2) << 32) - } - if debugDecoder { - println("Read FCS:", d.FrameContentSize) - } - } - - // Move this to shared. - d.HasCheckSum = fhd&(1<<2) != 0 - if d.HasCheckSum { - if d.crc == nil { - d.crc = xxhash.New() - } - d.crc.Reset() - } - - if d.WindowSize > d.o.maxWindowSize { - if debugDecoder { - printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize) - } - return ErrWindowSizeExceeded - } - - if d.WindowSize == 0 && d.SingleSegment { - // We may not need window in this case. - d.WindowSize = d.FrameContentSize - if d.WindowSize < MinWindowSize { - d.WindowSize = MinWindowSize - } - if d.WindowSize > d.o.maxDecodedSize { - if debugDecoder { - printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize) - } - return ErrDecoderSizeExceeded - } - } - - // The minimum Window_Size is 1 KB. - if d.WindowSize < MinWindowSize { - if debugDecoder { - println("got window size: ", d.WindowSize) - } - return ErrWindowSizeTooSmall - } - d.history.windowSize = int(d.WindowSize) - if !d.o.lowMem || d.history.windowSize < maxBlockSize { - // Alloc 2x window size if not low-mem, or window size below 2MB. - d.history.allocFrameBuffer = d.history.windowSize * 2 - } else { - if d.o.lowMem { - // Alloc with 1MB extra. - d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize/2 - } else { - // Alloc with 2MB extra. - d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize - } - } - - if debugDecoder { - println("Frame: Dict:", d.DictionaryID, "FrameContentSize:", d.FrameContentSize, "singleseg:", d.SingleSegment, "window:", d.WindowSize, "crc:", d.HasCheckSum) - } - - // history contains input - maybe we do something - d.rawInput = br - return nil -} - -// next will start decoding the next block from stream. -func (d *frameDec) next(block *blockDec) error { - if debugDecoder { - println("decoding new block") - } - err := block.reset(d.rawInput, d.WindowSize) - if err != nil { - println("block error:", err) - // Signal the frame decoder we have a problem. - block.sendErr(err) - return err - } - return nil -} - -// checkCRC will check the checksum, assuming the frame has one. -// Will return ErrCRCMismatch if crc check failed, otherwise nil. -func (d *frameDec) checkCRC() error { - // We can overwrite upper tmp now - buf, err := d.rawInput.readSmall(4) - if err != nil { - println("CRC missing?", err) - return err - } - - want := binary.LittleEndian.Uint32(buf[:4]) - got := uint32(d.crc.Sum64()) - - if got != want { - if debugDecoder { - printf("CRC check failed: got %08x, want %08x\n", got, want) - } - return ErrCRCMismatch - } - if debugDecoder { - printf("CRC ok %08x\n", got) - } - return nil -} - -// consumeCRC skips over the checksum, assuming the frame has one. -func (d *frameDec) consumeCRC() error { - _, err := d.rawInput.readSmall(4) - if err != nil { - println("CRC missing?", err) - } - return err -} - -// runDecoder will run the decoder for the remainder of the frame. -func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) { - saved := d.history.b - - // We use the history for output to avoid copying it. - d.history.b = dst - d.history.ignoreBuffer = len(dst) - // Store input length, so we only check new data. - crcStart := len(dst) - d.history.decoders.maxSyncLen = 0 - if d.o.limitToCap { - d.history.decoders.maxSyncLen = uint64(cap(dst) - len(dst)) - } - if d.FrameContentSize != fcsUnknown { - if !d.o.limitToCap || d.FrameContentSize+uint64(len(dst)) < d.history.decoders.maxSyncLen { - d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst)) - } - if d.history.decoders.maxSyncLen > d.o.maxDecodedSize { - if debugDecoder { - println("maxSyncLen:", d.history.decoders.maxSyncLen, "> maxDecodedSize:", d.o.maxDecodedSize) - } - return dst, ErrDecoderSizeExceeded - } - if debugDecoder { - println("maxSyncLen:", d.history.decoders.maxSyncLen) - } - if !d.o.limitToCap && uint64(cap(dst)) < d.history.decoders.maxSyncLen { - // Alloc for output - dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc) - copy(dst2, dst) - dst = dst2 - } - } - var err error - for { - err = dec.reset(d.rawInput, d.WindowSize) - if err != nil { - break - } - if debugDecoder { - println("next block:", dec) - } - err = dec.decodeBuf(&d.history) - if err != nil { - break - } - if uint64(len(d.history.b)-crcStart) > d.o.maxDecodedSize { - println("runDecoder: maxDecodedSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.o.maxDecodedSize) - err = ErrDecoderSizeExceeded - break - } - if d.o.limitToCap && len(d.history.b) > cap(dst) { - println("runDecoder: cap exceeded", uint64(len(d.history.b)), ">", cap(dst)) - err = ErrDecoderSizeExceeded - break - } - if uint64(len(d.history.b)-crcStart) > d.FrameContentSize { - println("runDecoder: FrameContentSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.FrameContentSize) - err = ErrFrameSizeExceeded - break - } - if dec.Last { - break - } - if debugDecoder { - println("runDecoder: FrameContentSize", uint64(len(d.history.b)-crcStart), "<=", d.FrameContentSize) - } - } - dst = d.history.b - if err == nil { - if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize { - err = ErrFrameSizeMismatch - } else if d.HasCheckSum { - if d.o.ignoreChecksum { - err = d.consumeCRC() - } else { - d.crc.Write(dst[crcStart:]) - err = d.checkCRC() - } - } - } - d.history.b = saved - return dst, err -} diff --git a/vendor/github.com/klauspost/compress/zstd/frameenc.go b/vendor/github.com/klauspost/compress/zstd/frameenc.go deleted file mode 100644 index 667ca0679..000000000 --- a/vendor/github.com/klauspost/compress/zstd/frameenc.go +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "encoding/binary" - "fmt" - "io" - "math" - "math/bits" -) - -type frameHeader struct { - ContentSize uint64 - WindowSize uint32 - SingleSegment bool - Checksum bool - DictID uint32 -} - -const maxHeaderSize = 14 - -func (f frameHeader) appendTo(dst []byte) []byte { - dst = append(dst, frameMagic...) - var fhd uint8 - if f.Checksum { - fhd |= 1 << 2 - } - if f.SingleSegment { - fhd |= 1 << 5 - } - - var dictIDContent []byte - if f.DictID > 0 { - var tmp [4]byte - if f.DictID < 256 { - fhd |= 1 - tmp[0] = uint8(f.DictID) - dictIDContent = tmp[:1] - } else if f.DictID < 1<<16 { - fhd |= 2 - binary.LittleEndian.PutUint16(tmp[:2], uint16(f.DictID)) - dictIDContent = tmp[:2] - } else { - fhd |= 3 - binary.LittleEndian.PutUint32(tmp[:4], f.DictID) - dictIDContent = tmp[:4] - } - } - var fcs uint8 - if f.ContentSize >= 256 { - fcs++ - } - if f.ContentSize >= 65536+256 { - fcs++ - } - if f.ContentSize >= 0xffffffff { - fcs++ - } - - fhd |= fcs << 6 - - dst = append(dst, fhd) - if !f.SingleSegment { - const winLogMin = 10 - windowLog := (bits.Len32(f.WindowSize-1) - winLogMin) << 3 - dst = append(dst, uint8(windowLog)) - } - if f.DictID > 0 { - dst = append(dst, dictIDContent...) - } - switch fcs { - case 0: - if f.SingleSegment { - dst = append(dst, uint8(f.ContentSize)) - } - // Unless SingleSegment is set, framessizes < 256 are not stored. - case 1: - f.ContentSize -= 256 - dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8)) - case 2: - dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8), uint8(f.ContentSize>>16), uint8(f.ContentSize>>24)) - case 3: - dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8), uint8(f.ContentSize>>16), uint8(f.ContentSize>>24), - uint8(f.ContentSize>>32), uint8(f.ContentSize>>40), uint8(f.ContentSize>>48), uint8(f.ContentSize>>56)) - default: - panic("invalid fcs") - } - return dst -} - -const skippableFrameHeader = 4 + 4 - -// calcSkippableFrame will return a total size to be added for written -// to be divisible by multiple. -// The value will always be > skippableFrameHeader. -// The function will panic if written < 0 or wantMultiple <= 0. -func calcSkippableFrame(written, wantMultiple int64) int { - if wantMultiple <= 0 { - panic("wantMultiple <= 0") - } - if written < 0 { - panic("written < 0") - } - leftOver := written % wantMultiple - if leftOver == 0 { - return 0 - } - toAdd := wantMultiple - leftOver - for toAdd < skippableFrameHeader { - toAdd += wantMultiple - } - return int(toAdd) -} - -// skippableFrame will add a skippable frame with a total size of bytes. -// total should be >= skippableFrameHeader and < math.MaxUint32. -func skippableFrame(dst []byte, total int, r io.Reader) ([]byte, error) { - if total == 0 { - return dst, nil - } - if total < skippableFrameHeader { - return dst, fmt.Errorf("requested skippable frame (%d) < 8", total) - } - if int64(total) > math.MaxUint32 { - return dst, fmt.Errorf("requested skippable frame (%d) > max uint32", total) - } - dst = append(dst, 0x50, 0x2a, 0x4d, 0x18) - f := uint32(total - skippableFrameHeader) - dst = append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)) - start := len(dst) - dst = append(dst, make([]byte, f)...) - _, err := io.ReadFull(r, dst[start:]) - return dst, err -} diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go deleted file mode 100644 index 2f8860a72..000000000 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go +++ /dev/null @@ -1,307 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "encoding/binary" - "errors" - "fmt" - "io" -) - -const ( - tablelogAbsoluteMax = 9 -) - -const ( - /*!MEMORY_USAGE : - * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) - * Increasing memory usage improves compression ratio - * Reduced memory usage can improve speed, due to cache effect - * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ - maxMemoryUsage = tablelogAbsoluteMax + 2 - - maxTableLog = maxMemoryUsage - 2 - maxTablesize = 1 << maxTableLog - maxTableMask = (1 << maxTableLog) - 1 - minTablelog = 5 - maxSymbolValue = 255 -) - -// fseDecoder provides temporary storage for compression and decompression. -type fseDecoder struct { - dt [maxTablesize]decSymbol // Decompression table. - symbolLen uint16 // Length of active part of the symbol table. - actualTableLog uint8 // Selected tablelog. - maxBits uint8 // Maximum number of additional bits - - // used for table creation to avoid allocations. - stateTable [256]uint16 - norm [maxSymbolValue + 1]int16 - preDefined bool -} - -// tableStep returns the next table index. -func tableStep(tableSize uint32) uint32 { - return (tableSize >> 1) + (tableSize >> 3) + 3 -} - -// readNCount will read the symbol distribution so decoding tables can be constructed. -func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { - var ( - charnum uint16 - previous0 bool - ) - if b.remain() < 4 { - return errors.New("input too small") - } - bitStream := b.Uint32NC() - nbBits := uint((bitStream & 0xF) + minTablelog) // extract tableLog - if nbBits > tablelogAbsoluteMax { - println("Invalid tablelog:", nbBits) - return errors.New("tableLog too large") - } - bitStream >>= 4 - bitCount := uint(4) - - s.actualTableLog = uint8(nbBits) - remaining := int32((1 << nbBits) + 1) - threshold := int32(1 << nbBits) - gotTotal := int32(0) - nbBits++ - - for remaining > 1 && charnum <= maxSymbol { - if previous0 { - //println("prev0") - n0 := charnum - for (bitStream & 0xFFFF) == 0xFFFF { - //println("24 x 0") - n0 += 24 - if r := b.remain(); r > 5 { - b.advance(2) - // The check above should make sure we can read 32 bits - bitStream = b.Uint32NC() >> bitCount - } else { - // end of bit stream - bitStream >>= 16 - bitCount += 16 - } - } - //printf("bitstream: %d, 0b%b", bitStream&3, bitStream) - for (bitStream & 3) == 3 { - n0 += 3 - bitStream >>= 2 - bitCount += 2 - } - n0 += uint16(bitStream & 3) - bitCount += 2 - - if n0 > maxSymbolValue { - return errors.New("maxSymbolValue too small") - } - //println("inserting ", n0-charnum, "zeroes from idx", charnum, "ending before", n0) - for charnum < n0 { - s.norm[uint8(charnum)] = 0 - charnum++ - } - - if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 { - b.advance(bitCount >> 3) - bitCount &= 7 - // The check above should make sure we can read 32 bits - bitStream = b.Uint32NC() >> bitCount - } else { - bitStream >>= 2 - } - } - - max := (2*threshold - 1) - remaining - var count int32 - - if int32(bitStream)&(threshold-1) < max { - count = int32(bitStream) & (threshold - 1) - if debugAsserts && nbBits < 1 { - panic("nbBits underflow") - } - bitCount += nbBits - 1 - } else { - count = int32(bitStream) & (2*threshold - 1) - if count >= threshold { - count -= max - } - bitCount += nbBits - } - - // extra accuracy - count-- - if count < 0 { - // -1 means +1 - remaining += count - gotTotal -= count - } else { - remaining -= count - gotTotal += count - } - s.norm[charnum&0xff] = int16(count) - charnum++ - previous0 = count == 0 - for remaining < threshold { - nbBits-- - threshold >>= 1 - } - - if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 { - b.advance(bitCount >> 3) - bitCount &= 7 - // The check above should make sure we can read 32 bits - bitStream = b.Uint32NC() >> (bitCount & 31) - } else { - bitCount -= (uint)(8 * (len(b.b) - 4 - b.off)) - b.off = len(b.b) - 4 - bitStream = b.Uint32() >> (bitCount & 31) - } - } - s.symbolLen = charnum - if s.symbolLen <= 1 { - return fmt.Errorf("symbolLen (%d) too small", s.symbolLen) - } - if s.symbolLen > maxSymbolValue+1 { - return fmt.Errorf("symbolLen (%d) too big", s.symbolLen) - } - if remaining != 1 { - return fmt.Errorf("corruption detected (remaining %d != 1)", remaining) - } - if bitCount > 32 { - return fmt.Errorf("corruption detected (bitCount %d > 32)", bitCount) - } - if gotTotal != 1<> 3) - return s.buildDtable() -} - -func (s *fseDecoder) mustReadFrom(r io.Reader) { - fatalErr := func(err error) { - if err != nil { - panic(err) - } - } - // dt [maxTablesize]decSymbol // Decompression table. - // symbolLen uint16 // Length of active part of the symbol table. - // actualTableLog uint8 // Selected tablelog. - // maxBits uint8 // Maximum number of additional bits - // // used for table creation to avoid allocations. - // stateTable [256]uint16 - // norm [maxSymbolValue + 1]int16 - // preDefined bool - fatalErr(binary.Read(r, binary.LittleEndian, &s.dt)) - fatalErr(binary.Read(r, binary.LittleEndian, &s.symbolLen)) - fatalErr(binary.Read(r, binary.LittleEndian, &s.actualTableLog)) - fatalErr(binary.Read(r, binary.LittleEndian, &s.maxBits)) - fatalErr(binary.Read(r, binary.LittleEndian, &s.stateTable)) - fatalErr(binary.Read(r, binary.LittleEndian, &s.norm)) - fatalErr(binary.Read(r, binary.LittleEndian, &s.preDefined)) -} - -// decSymbol contains information about a state entry, -// Including the state offset base, the output symbol and -// the number of bits to read for the low part of the destination state. -// Using a composite uint64 is faster than a struct with separate members. -type decSymbol uint64 - -func newDecSymbol(nbits, addBits uint8, newState uint16, baseline uint32) decSymbol { - return decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32) -} - -func (d decSymbol) nbBits() uint8 { - return uint8(d) -} - -func (d decSymbol) addBits() uint8 { - return uint8(d >> 8) -} - -func (d decSymbol) newState() uint16 { - return uint16(d >> 16) -} - -func (d decSymbol) baselineInt() int { - return int(d >> 32) -} - -func (d *decSymbol) setNBits(nBits uint8) { - const mask = 0xffffffffffffff00 - *d = (*d & mask) | decSymbol(nBits) -} - -func (d *decSymbol) setAddBits(addBits uint8) { - const mask = 0xffffffffffff00ff - *d = (*d & mask) | (decSymbol(addBits) << 8) -} - -func (d *decSymbol) setNewState(state uint16) { - const mask = 0xffffffff0000ffff - *d = (*d & mask) | decSymbol(state)<<16 -} - -func (d *decSymbol) setExt(addBits uint8, baseline uint32) { - const mask = 0xffff00ff - *d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32) -} - -// decSymbolValue returns the transformed decSymbol for the given symbol. -func decSymbolValue(symb uint8, t []baseOffset) (decSymbol, error) { - if int(symb) >= len(t) { - return 0, fmt.Errorf("rle symbol %d >= max %d", symb, len(t)) - } - lu := t[symb] - return newDecSymbol(0, lu.addBits, 0, lu.baseLine), nil -} - -// setRLE will set the decoder til RLE mode. -func (s *fseDecoder) setRLE(symbol decSymbol) { - s.actualTableLog = 0 - s.maxBits = symbol.addBits() - s.dt[0] = symbol -} - -// transform will transform the decoder table into a table usable for -// decoding without having to apply the transformation while decoding. -// The state will contain the base value and the number of bits to read. -func (s *fseDecoder) transform(t []baseOffset) error { - tableSize := uint16(1 << s.actualTableLog) - s.maxBits = 0 - for i, v := range s.dt[:tableSize] { - add := v.addBits() - if int(add) >= len(t) { - return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits(), len(t)) - } - lu := t[add] - if lu.addBits > s.maxBits { - s.maxBits = lu.addBits - } - v.setExt(lu.addBits, lu.baseLine) - s.dt[i] = v - } - return nil -} - -type fseState struct { - dt []decSymbol - state decSymbol -} - -// Initialize and decodeAsync first state and symbol. -func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) { - s.dt = dt - br.fill() - s.state = dt[br.getBits(tableLog)] -} - -// final returns the current state symbol without decoding the next. -func (s decSymbol) final() (int, uint8) { - return s.baselineInt(), s.addBits() -} diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go deleted file mode 100644 index d04a829b0..000000000 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go +++ /dev/null @@ -1,65 +0,0 @@ -//go:build amd64 && !appengine && !noasm && gc -// +build amd64,!appengine,!noasm,gc - -package zstd - -import ( - "fmt" -) - -type buildDtableAsmContext struct { - // inputs - stateTable *uint16 - norm *int16 - dt *uint64 - - // outputs --- set by the procedure in the case of error; - // for interpretation please see the error handling part below - errParam1 uint64 - errParam2 uint64 -} - -// buildDtable_asm is an x86 assembly implementation of fseDecoder.buildDtable. -// Function returns non-zero exit code on error. -// -//go:noescape -func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int - -// please keep in sync with _generate/gen_fse.go -const ( - errorCorruptedNormalizedCounter = 1 - errorNewStateTooBig = 2 - errorNewStateNoBits = 3 -) - -// buildDtable will build the decoding table. -func (s *fseDecoder) buildDtable() error { - ctx := buildDtableAsmContext{ - stateTable: &s.stateTable[0], - norm: &s.norm[0], - dt: (*uint64)(&s.dt[0]), - } - code := buildDtable_asm(s, &ctx) - - if code != 0 { - switch code { - case errorCorruptedNormalizedCounter: - position := ctx.errParam1 - return fmt.Errorf("corrupted input (position=%d, expected 0)", position) - - case errorNewStateTooBig: - newState := decSymbol(ctx.errParam1) - size := ctx.errParam2 - return fmt.Errorf("newState (%d) outside table size (%d)", newState, size) - - case errorNewStateNoBits: - newState := decSymbol(ctx.errParam1) - oldState := decSymbol(ctx.errParam2) - return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, oldState) - - default: - return fmt.Errorf("buildDtable_asm returned unhandled nonzero code = %d", code) - } - } - return nil -} diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s deleted file mode 100644 index bcde39869..000000000 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s +++ /dev/null @@ -1,126 +0,0 @@ -// Code generated by command: go run gen_fse.go -out ../fse_decoder_amd64.s -pkg=zstd. DO NOT EDIT. - -//go:build !appengine && !noasm && gc && !noasm - -// func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int -TEXT ·buildDtable_asm(SB), $0-24 - MOVQ ctx+8(FP), CX - MOVQ s+0(FP), DI - - // Load values - MOVBQZX 4098(DI), DX - XORQ AX, AX - BTSQ DX, AX - MOVQ (CX), BX - MOVQ 16(CX), SI - LEAQ -1(AX), R8 - MOVQ 8(CX), CX - MOVWQZX 4096(DI), DI - - // End load values - // Init, lay down lowprob symbols - XORQ R9, R9 - JMP init_main_loop_condition - -init_main_loop: - MOVWQSX (CX)(R9*2), R10 - CMPW R10, $-1 - JNE do_not_update_high_threshold - MOVB R9, 1(SI)(R8*8) - DECQ R8 - MOVQ $0x0000000000000001, R10 - -do_not_update_high_threshold: - MOVW R10, (BX)(R9*2) - INCQ R9 - -init_main_loop_condition: - CMPQ R9, DI - JL init_main_loop - - // Spread symbols - // Calculate table step - MOVQ AX, R9 - SHRQ $0x01, R9 - MOVQ AX, R10 - SHRQ $0x03, R10 - LEAQ 3(R9)(R10*1), R9 - - // Fill add bits values - LEAQ -1(AX), R10 - XORQ R11, R11 - XORQ R12, R12 - JMP spread_main_loop_condition - -spread_main_loop: - XORQ R13, R13 - MOVWQSX (CX)(R12*2), R14 - JMP spread_inner_loop_condition - -spread_inner_loop: - MOVB R12, 1(SI)(R11*8) - -adjust_position: - ADDQ R9, R11 - ANDQ R10, R11 - CMPQ R11, R8 - JG adjust_position - INCQ R13 - -spread_inner_loop_condition: - CMPQ R13, R14 - JL spread_inner_loop - INCQ R12 - -spread_main_loop_condition: - CMPQ R12, DI - JL spread_main_loop - TESTQ R11, R11 - JZ spread_check_ok - MOVQ ctx+8(FP), AX - MOVQ R11, 24(AX) - MOVQ $+1, ret+16(FP) - RET - -spread_check_ok: - // Build Decoding table - XORQ DI, DI - -build_table_main_table: - MOVBQZX 1(SI)(DI*8), CX - MOVWQZX (BX)(CX*2), R8 - LEAQ 1(R8), R9 - MOVW R9, (BX)(CX*2) - MOVQ R8, R9 - BSRQ R9, R9 - MOVQ DX, CX - SUBQ R9, CX - SHLQ CL, R8 - SUBQ AX, R8 - MOVB CL, (SI)(DI*8) - MOVW R8, 2(SI)(DI*8) - CMPQ R8, AX - JLE build_table_check1_ok - MOVQ ctx+8(FP), CX - MOVQ R8, 24(CX) - MOVQ AX, 32(CX) - MOVQ $+2, ret+16(FP) - RET - -build_table_check1_ok: - TESTB CL, CL - JNZ build_table_check2_ok - CMPW R8, DI - JNE build_table_check2_ok - MOVQ ctx+8(FP), AX - MOVQ R8, 24(AX) - MOVQ DI, 32(AX) - MOVQ $+3, ret+16(FP) - RET - -build_table_check2_ok: - INCQ DI - CMPQ DI, AX - JL build_table_main_table - MOVQ $+0, ret+16(FP) - RET diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go deleted file mode 100644 index 8adfebb02..000000000 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go +++ /dev/null @@ -1,73 +0,0 @@ -//go:build !amd64 || appengine || !gc || noasm -// +build !amd64 appengine !gc noasm - -package zstd - -import ( - "errors" - "fmt" -) - -// buildDtable will build the decoding table. -func (s *fseDecoder) buildDtable() error { - tableSize := uint32(1 << s.actualTableLog) - highThreshold := tableSize - 1 - symbolNext := s.stateTable[:256] - - // Init, lay down lowprob symbols - { - for i, v := range s.norm[:s.symbolLen] { - if v == -1 { - s.dt[highThreshold].setAddBits(uint8(i)) - highThreshold-- - v = 1 - } - symbolNext[i] = uint16(v) - } - } - - // Spread symbols - { - tableMask := tableSize - 1 - step := tableStep(tableSize) - position := uint32(0) - for ss, v := range s.norm[:s.symbolLen] { - for i := 0; i < int(v); i++ { - s.dt[position].setAddBits(uint8(ss)) - for { - // lowprob area - position = (position + step) & tableMask - if position <= highThreshold { - break - } - } - } - } - if position != 0 { - // position must reach all cells once, otherwise normalizedCounter is incorrect - return errors.New("corrupted input (position != 0)") - } - } - - // Build Decoding table - { - tableSize := uint16(1 << s.actualTableLog) - for u, v := range s.dt[:tableSize] { - symbol := v.addBits() - nextState := symbolNext[symbol] - symbolNext[symbol] = nextState + 1 - nBits := s.actualTableLog - byte(highBits(uint32(nextState))) - s.dt[u&maxTableMask].setNBits(nBits) - newState := (nextState << nBits) - tableSize - if newState > tableSize { - return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize) - } - if newState == uint16(u) && nBits == 0 { - // Seems weird that this is possible with nbits > 0. - return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u) - } - s.dt[u&maxTableMask].setNewState(newState) - } - } - return nil -} diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go deleted file mode 100644 index ab26326a8..000000000 --- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go +++ /dev/null @@ -1,701 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "errors" - "fmt" - "math" -) - -const ( - // For encoding we only support up to - maxEncTableLog = 8 - maxEncTablesize = 1 << maxTableLog - maxEncTableMask = (1 << maxTableLog) - 1 - minEncTablelog = 5 - maxEncSymbolValue = maxMatchLengthSymbol -) - -// Scratch provides temporary storage for compression and decompression. -type fseEncoder struct { - symbolLen uint16 // Length of active part of the symbol table. - actualTableLog uint8 // Selected tablelog. - ct cTable // Compression tables. - maxCount int // count of the most probable symbol - zeroBits bool // no bits has prob > 50%. - clearCount bool // clear count - useRLE bool // This encoder is for RLE - preDefined bool // This encoder is predefined. - reUsed bool // Set to know when the encoder has been reused. - rleVal uint8 // RLE Symbol - maxBits uint8 // Maximum output bits after transform. - - // TODO: Technically zstd should be fine with 64 bytes. - count [256]uint32 - norm [256]int16 -} - -// cTable contains tables used for compression. -type cTable struct { - tableSymbol []byte - stateTable []uint16 - symbolTT []symbolTransform -} - -// symbolTransform contains the state transform for a symbol. -type symbolTransform struct { - deltaNbBits uint32 - deltaFindState int16 - outBits uint8 -} - -// String prints values as a human readable string. -func (s symbolTransform) String() string { - return fmt.Sprintf("{deltabits: %08x, findstate:%d outbits:%d}", s.deltaNbBits, s.deltaFindState, s.outBits) -} - -// Histogram allows to populate the histogram and skip that step in the compression, -// It otherwise allows to inspect the histogram when compression is done. -// To indicate that you have populated the histogram call HistogramFinished -// with the value of the highest populated symbol, as well as the number of entries -// in the most populated entry. These are accepted at face value. -func (s *fseEncoder) Histogram() *[256]uint32 { - return &s.count -} - -// HistogramFinished can be called to indicate that the histogram has been populated. -// maxSymbol is the index of the highest set symbol of the next data segment. -// maxCount is the number of entries in the most populated entry. -// These are accepted at face value. -func (s *fseEncoder) HistogramFinished(maxSymbol uint8, maxCount int) { - s.maxCount = maxCount - s.symbolLen = uint16(maxSymbol) + 1 - s.clearCount = maxCount != 0 -} - -// allocCtable will allocate tables needed for compression. -// If existing tables a re big enough, they are simply re-used. -func (s *fseEncoder) allocCtable() { - tableSize := 1 << s.actualTableLog - // get tableSymbol that is big enough. - if cap(s.ct.tableSymbol) < tableSize { - s.ct.tableSymbol = make([]byte, tableSize) - } - s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] - - ctSize := tableSize - if cap(s.ct.stateTable) < ctSize { - s.ct.stateTable = make([]uint16, ctSize) - } - s.ct.stateTable = s.ct.stateTable[:ctSize] - - if cap(s.ct.symbolTT) < 256 { - s.ct.symbolTT = make([]symbolTransform, 256) - } - s.ct.symbolTT = s.ct.symbolTT[:256] -} - -// buildCTable will populate the compression table so it is ready to be used. -func (s *fseEncoder) buildCTable() error { - tableSize := uint32(1 << s.actualTableLog) - highThreshold := tableSize - 1 - var cumul [256]int16 - - s.allocCtable() - tableSymbol := s.ct.tableSymbol[:tableSize] - // symbol start positions - { - cumul[0] = 0 - for ui, v := range s.norm[:s.symbolLen-1] { - u := byte(ui) // one less than reference - if v == -1 { - // Low proba symbol - cumul[u+1] = cumul[u] + 1 - tableSymbol[highThreshold] = u - highThreshold-- - } else { - cumul[u+1] = cumul[u] + v - } - } - // Encode last symbol separately to avoid overflowing u - u := int(s.symbolLen - 1) - v := s.norm[s.symbolLen-1] - if v == -1 { - // Low proba symbol - cumul[u+1] = cumul[u] + 1 - tableSymbol[highThreshold] = byte(u) - highThreshold-- - } else { - cumul[u+1] = cumul[u] + v - } - if uint32(cumul[s.symbolLen]) != tableSize { - return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize) - } - cumul[s.symbolLen] = int16(tableSize) + 1 - } - // Spread symbols - s.zeroBits = false - { - step := tableStep(tableSize) - tableMask := tableSize - 1 - var position uint32 - // if any symbol > largeLimit, we may have 0 bits output. - largeLimit := int16(1 << (s.actualTableLog - 1)) - for ui, v := range s.norm[:s.symbolLen] { - symbol := byte(ui) - if v > largeLimit { - s.zeroBits = true - } - for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ { - tableSymbol[position] = symbol - position = (position + step) & tableMask - for position > highThreshold { - position = (position + step) & tableMask - } /* Low proba area */ - } - } - - // Check if we have gone through all positions - if position != 0 { - return errors.New("position!=0") - } - } - - // Build table - table := s.ct.stateTable - { - tsi := int(tableSize) - for u, v := range tableSymbol { - // TableU16 : sorted by symbol order; gives next state value - table[cumul[v]] = uint16(tsi + u) - cumul[v]++ - } - } - - // Build Symbol Transformation Table - { - total := int16(0) - symbolTT := s.ct.symbolTT[:s.symbolLen] - tableLog := s.actualTableLog - tl := (uint32(tableLog) << 16) - (1 << tableLog) - for i, v := range s.norm[:s.symbolLen] { - switch v { - case 0: - case -1, 1: - symbolTT[i].deltaNbBits = tl - symbolTT[i].deltaFindState = total - 1 - total++ - default: - maxBitsOut := uint32(tableLog) - highBit(uint32(v-1)) - minStatePlus := uint32(v) << maxBitsOut - symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus - symbolTT[i].deltaFindState = total - v - total += v - } - } - if total != int16(tableSize) { - return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize) - } - } - return nil -} - -var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000} - -func (s *fseEncoder) setRLE(val byte) { - s.allocCtable() - s.actualTableLog = 0 - s.ct.stateTable = s.ct.stateTable[:1] - s.ct.symbolTT[val] = symbolTransform{ - deltaFindState: 0, - deltaNbBits: 0, - } - if debugEncoder { - println("setRLE: val", val, "symbolTT", s.ct.symbolTT[val]) - } - s.rleVal = val - s.useRLE = true -} - -// setBits will set output bits for the transform. -// if nil is provided, the number of bits is equal to the index. -func (s *fseEncoder) setBits(transform []byte) { - if s.reUsed || s.preDefined { - return - } - if s.useRLE { - if transform == nil { - s.ct.symbolTT[s.rleVal].outBits = s.rleVal - s.maxBits = s.rleVal - return - } - s.maxBits = transform[s.rleVal] - s.ct.symbolTT[s.rleVal].outBits = s.maxBits - return - } - if transform == nil { - for i := range s.ct.symbolTT[:s.symbolLen] { - s.ct.symbolTT[i].outBits = uint8(i) - } - s.maxBits = uint8(s.symbolLen - 1) - return - } - s.maxBits = 0 - for i, v := range transform[:s.symbolLen] { - s.ct.symbolTT[i].outBits = v - if v > s.maxBits { - // We could assume bits always going up, but we play safe. - s.maxBits = v - } - } -} - -// normalizeCount will normalize the count of the symbols so -// the total is equal to the table size. -// If successful, compression tables will also be made ready. -func (s *fseEncoder) normalizeCount(length int) error { - if s.reUsed { - return nil - } - s.optimalTableLog(length) - var ( - tableLog = s.actualTableLog - scale = 62 - uint64(tableLog) - step = (1 << 62) / uint64(length) - vStep = uint64(1) << (scale - 20) - stillToDistribute = int16(1 << tableLog) - largest int - largestP int16 - lowThreshold = (uint32)(length >> tableLog) - ) - if s.maxCount == length { - s.useRLE = true - return nil - } - s.useRLE = false - for i, cnt := range s.count[:s.symbolLen] { - // already handled - // if (count[s] == s.length) return 0; /* rle special case */ - - if cnt == 0 { - s.norm[i] = 0 - continue - } - if cnt <= lowThreshold { - s.norm[i] = -1 - stillToDistribute-- - } else { - proba := (int16)((uint64(cnt) * step) >> scale) - if proba < 8 { - restToBeat := vStep * uint64(rtbTable[proba]) - v := uint64(cnt)*step - (uint64(proba) << scale) - if v > restToBeat { - proba++ - } - } - if proba > largestP { - largestP = proba - largest = i - } - s.norm[i] = proba - stillToDistribute -= proba - } - } - - if -stillToDistribute >= (s.norm[largest] >> 1) { - // corner case, need another normalization method - err := s.normalizeCount2(length) - if err != nil { - return err - } - if debugAsserts { - err = s.validateNorm() - if err != nil { - return err - } - } - return s.buildCTable() - } - s.norm[largest] += stillToDistribute - if debugAsserts { - err := s.validateNorm() - if err != nil { - return err - } - } - return s.buildCTable() -} - -// Secondary normalization method. -// To be used when primary method fails. -func (s *fseEncoder) normalizeCount2(length int) error { - const notYetAssigned = -2 - var ( - distributed uint32 - total = uint32(length) - tableLog = s.actualTableLog - lowThreshold = total >> tableLog - lowOne = (total * 3) >> (tableLog + 1) - ) - for i, cnt := range s.count[:s.symbolLen] { - if cnt == 0 { - s.norm[i] = 0 - continue - } - if cnt <= lowThreshold { - s.norm[i] = -1 - distributed++ - total -= cnt - continue - } - if cnt <= lowOne { - s.norm[i] = 1 - distributed++ - total -= cnt - continue - } - s.norm[i] = notYetAssigned - } - toDistribute := (1 << tableLog) - distributed - - if (total / toDistribute) > lowOne { - // risk of rounding to zero - lowOne = (total * 3) / (toDistribute * 2) - for i, cnt := range s.count[:s.symbolLen] { - if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { - s.norm[i] = 1 - distributed++ - total -= cnt - continue - } - } - toDistribute = (1 << tableLog) - distributed - } - if distributed == uint32(s.symbolLen)+1 { - // all values are pretty poor; - // probably incompressible data (should have already been detected); - // find max, then give all remaining points to max - var maxV int - var maxC uint32 - for i, cnt := range s.count[:s.symbolLen] { - if cnt > maxC { - maxV = i - maxC = cnt - } - } - s.norm[maxV] += int16(toDistribute) - return nil - } - - if total == 0 { - // all of the symbols were low enough for the lowOne or lowThreshold - for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) { - if s.norm[i] > 0 { - toDistribute-- - s.norm[i]++ - } - } - return nil - } - - var ( - vStepLog = 62 - uint64(tableLog) - mid = uint64((1 << (vStepLog - 1)) - 1) - rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining - tmpTotal = mid - ) - for i, cnt := range s.count[:s.symbolLen] { - if s.norm[i] == notYetAssigned { - var ( - end = tmpTotal + uint64(cnt)*rStep - sStart = uint32(tmpTotal >> vStepLog) - sEnd = uint32(end >> vStepLog) - weight = sEnd - sStart - ) - if weight < 1 { - return errors.New("weight < 1") - } - s.norm[i] = int16(weight) - tmpTotal = end - } - } - return nil -} - -// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog -func (s *fseEncoder) optimalTableLog(length int) { - tableLog := uint8(maxEncTableLog) - minBitsSrc := highBit(uint32(length)) + 1 - minBitsSymbols := highBit(uint32(s.symbolLen-1)) + 2 - minBits := uint8(minBitsSymbols) - if minBitsSrc < minBitsSymbols { - minBits = uint8(minBitsSrc) - } - - maxBitsSrc := uint8(highBit(uint32(length-1))) - 2 - if maxBitsSrc < tableLog { - // Accuracy can be reduced - tableLog = maxBitsSrc - } - if minBits > tableLog { - tableLog = minBits - } - // Need a minimum to safely represent all symbol values - if tableLog < minEncTablelog { - tableLog = minEncTablelog - } - if tableLog > maxEncTableLog { - tableLog = maxEncTableLog - } - s.actualTableLog = tableLog -} - -// validateNorm validates the normalized histogram table. -func (s *fseEncoder) validateNorm() (err error) { - var total int - for _, v := range s.norm[:s.symbolLen] { - if v >= 0 { - total += int(v) - } else { - total -= int(v) - } - } - defer func() { - if err == nil { - return - } - fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen) - for i, v := range s.norm[:s.symbolLen] { - fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v) - } - }() - if total != (1 << s.actualTableLog) { - return fmt.Errorf("warning: Total == %d != %d", total, 1<> 3) + 3 + 2 - - // Write Table Size - bitStream = uint32(tableLog - minEncTablelog) - bitCount = uint(4) - remaining = int16(tableSize + 1) /* +1 for extra accuracy */ - threshold = int16(tableSize) - nbBits = uint(tableLog + 1) - outP = len(out) - ) - if cap(out) < outP+maxHeaderSize { - out = append(out, make([]byte, maxHeaderSize*3)...) - out = out[:len(out)-maxHeaderSize*3] - } - out = out[:outP+maxHeaderSize] - - // stops at 1 - for remaining > 1 { - if previous0 { - start := charnum - for s.norm[charnum] == 0 { - charnum++ - } - for charnum >= start+24 { - start += 24 - bitStream += uint32(0xFFFF) << bitCount - out[outP] = byte(bitStream) - out[outP+1] = byte(bitStream >> 8) - outP += 2 - bitStream >>= 16 - } - for charnum >= start+3 { - start += 3 - bitStream += 3 << bitCount - bitCount += 2 - } - bitStream += uint32(charnum-start) << bitCount - bitCount += 2 - if bitCount > 16 { - out[outP] = byte(bitStream) - out[outP+1] = byte(bitStream >> 8) - outP += 2 - bitStream >>= 16 - bitCount -= 16 - } - } - - count := s.norm[charnum] - charnum++ - max := (2*threshold - 1) - remaining - if count < 0 { - remaining += count - } else { - remaining -= count - } - count++ // +1 for extra accuracy - if count >= threshold { - count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ - } - bitStream += uint32(count) << bitCount - bitCount += nbBits - if count < max { - bitCount-- - } - - previous0 = count == 1 - if remaining < 1 { - return nil, errors.New("internal error: remaining < 1") - } - for remaining < threshold { - nbBits-- - threshold >>= 1 - } - - if bitCount > 16 { - out[outP] = byte(bitStream) - out[outP+1] = byte(bitStream >> 8) - outP += 2 - bitStream >>= 16 - bitCount -= 16 - } - } - - if outP+2 > len(out) { - return nil, fmt.Errorf("internal error: %d > %d, maxheader: %d, sl: %d, tl: %d, normcount: %v", outP+2, len(out), maxHeaderSize, s.symbolLen, int(tableLog), s.norm[:s.symbolLen]) - } - out[outP] = byte(bitStream) - out[outP+1] = byte(bitStream >> 8) - outP += int((bitCount + 7) / 8) - - if charnum > s.symbolLen { - return nil, errors.New("internal error: charnum > s.symbolLen") - } - return out[:outP], nil -} - -// Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) -// note 1 : assume symbolValue is valid (<= maxSymbolValue) -// note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits * -func (s *fseEncoder) bitCost(symbolValue uint8, accuracyLog uint32) uint32 { - minNbBits := s.ct.symbolTT[symbolValue].deltaNbBits >> 16 - threshold := (minNbBits + 1) << 16 - if debugAsserts { - if !(s.actualTableLog < 16) { - panic("!s.actualTableLog < 16") - } - // ensure enough room for renormalization double shift - if !(uint8(accuracyLog) < 31-s.actualTableLog) { - panic("!uint8(accuracyLog) < 31-s.actualTableLog") - } - } - tableSize := uint32(1) << s.actualTableLog - deltaFromThreshold := threshold - (s.ct.symbolTT[symbolValue].deltaNbBits + tableSize) - // linear interpolation (very approximate) - normalizedDeltaFromThreshold := (deltaFromThreshold << accuracyLog) >> s.actualTableLog - bitMultiplier := uint32(1) << accuracyLog - if debugAsserts { - if s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold { - panic("s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold") - } - if normalizedDeltaFromThreshold > bitMultiplier { - panic("normalizedDeltaFromThreshold > bitMultiplier") - } - } - return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold -} - -// Returns the cost in bits of encoding the distribution in count using ctable. -// Histogram should only be up to the last non-zero symbol. -// Returns an -1 if ctable cannot represent all the symbols in count. -func (s *fseEncoder) approxSize(hist []uint32) uint32 { - if int(s.symbolLen) < len(hist) { - // More symbols than we have. - return math.MaxUint32 - } - if s.useRLE { - // We will never reuse RLE encoders. - return math.MaxUint32 - } - const kAccuracyLog = 8 - badCost := (uint32(s.actualTableLog) + 1) << kAccuracyLog - var cost uint32 - for i, v := range hist { - if v == 0 { - continue - } - if s.norm[i] == 0 { - return math.MaxUint32 - } - bitCost := s.bitCost(uint8(i), kAccuracyLog) - if bitCost > badCost { - return math.MaxUint32 - } - cost += v * bitCost - } - return cost >> kAccuracyLog -} - -// maxHeaderSize returns the maximum header size in bits. -// This is not exact size, but we want a penalty for new tables anyway. -func (s *fseEncoder) maxHeaderSize() uint32 { - if s.preDefined { - return 0 - } - if s.useRLE { - return 8 - } - return (((uint32(s.symbolLen) * uint32(s.actualTableLog)) >> 3) + 3) * 8 -} - -// cState contains the compression state of a stream. -type cState struct { - bw *bitWriter - stateTable []uint16 - state uint16 -} - -// init will initialize the compression state to the first symbol of the stream. -func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) { - c.bw = bw - c.stateTable = ct.stateTable - if len(c.stateTable) == 1 { - // RLE - c.stateTable[0] = uint16(0) - c.state = 0 - return - } - nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16 - im := int32((nbBitsOut << 16) - first.deltaNbBits) - lu := (im >> nbBitsOut) + int32(first.deltaFindState) - c.state = c.stateTable[lu] -} - -// flush will write the tablelog to the output and flush the remaining full bytes. -func (c *cState) flush(tableLog uint8) { - c.bw.flush32() - c.bw.addBits16NC(c.state, tableLog) -} diff --git a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go deleted file mode 100644 index 474cb77d2..000000000 --- a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "fmt" - "math" - "sync" -) - -var ( - // fsePredef are the predefined fse tables as defined here: - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions - // These values are already transformed. - fsePredef [3]fseDecoder - - // fsePredefEnc are the predefined encoder based on fse tables as defined here: - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions - // These values are already transformed. - fsePredefEnc [3]fseEncoder - - // symbolTableX contain the transformations needed for each type as defined in - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#the-codes-for-literals-lengths-match-lengths-and-offsets - symbolTableX [3][]baseOffset - - // maxTableSymbol is the biggest supported symbol for each table type - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#the-codes-for-literals-lengths-match-lengths-and-offsets - maxTableSymbol = [3]uint8{tableLiteralLengths: maxLiteralLengthSymbol, tableOffsets: maxOffsetLengthSymbol, tableMatchLengths: maxMatchLengthSymbol} - - // bitTables is the bits table for each table. - bitTables = [3][]byte{tableLiteralLengths: llBitsTable[:], tableOffsets: nil, tableMatchLengths: mlBitsTable[:]} -) - -type tableIndex uint8 - -const ( - // indexes for fsePredef and symbolTableX - tableLiteralLengths tableIndex = 0 - tableOffsets tableIndex = 1 - tableMatchLengths tableIndex = 2 - - maxLiteralLengthSymbol = 35 - maxOffsetLengthSymbol = 30 - maxMatchLengthSymbol = 52 -) - -// baseOffset is used for calculating transformations. -type baseOffset struct { - baseLine uint32 - addBits uint8 -} - -// fillBase will precalculate base offsets with the given bit distributions. -func fillBase(dst []baseOffset, base uint32, bits ...uint8) { - if len(bits) != len(dst) { - panic(fmt.Sprintf("len(dst) (%d) != len(bits) (%d)", len(dst), len(bits))) - } - for i, bit := range bits { - if base > math.MaxInt32 { - panic("invalid decoding table, base overflows int32") - } - - dst[i] = baseOffset{ - baseLine: base, - addBits: bit, - } - base += 1 << bit - } -} - -var predef sync.Once - -func initPredefined() { - predef.Do(func() { - // Literals length codes - tmp := make([]baseOffset, 36) - for i := range tmp[:16] { - tmp[i] = baseOffset{ - baseLine: uint32(i), - addBits: 0, - } - } - fillBase(tmp[16:], 16, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - symbolTableX[tableLiteralLengths] = tmp - - // Match length codes - tmp = make([]baseOffset, 53) - for i := range tmp[:32] { - tmp[i] = baseOffset{ - // The transformation adds the 3 length. - baseLine: uint32(i) + 3, - addBits: 0, - } - } - fillBase(tmp[32:], 35, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - symbolTableX[tableMatchLengths] = tmp - - // Offset codes - tmp = make([]baseOffset, maxOffsetBits+1) - tmp[1] = baseOffset{ - baseLine: 1, - addBits: 1, - } - fillBase(tmp[2:], 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30) - symbolTableX[tableOffsets] = tmp - - // Fill predefined tables and transform them. - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions - for i := range fsePredef[:] { - f := &fsePredef[i] - switch tableIndex(i) { - case tableLiteralLengths: - // https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L243 - f.actualTableLog = 6 - copy(f.norm[:], []int16{4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, - -1, -1, -1, -1}) - f.symbolLen = 36 - case tableOffsets: - // https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L281 - f.actualTableLog = 5 - copy(f.norm[:], []int16{ - 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1}) - f.symbolLen = 29 - case tableMatchLengths: - //https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L304 - f.actualTableLog = 6 - copy(f.norm[:], []int16{ - 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, - -1, -1, -1, -1, -1}) - f.symbolLen = 53 - } - if err := f.buildDtable(); err != nil { - panic(fmt.Errorf("building table %v: %v", tableIndex(i), err)) - } - if err := f.transform(symbolTableX[i]); err != nil { - panic(fmt.Errorf("building table %v: %v", tableIndex(i), err)) - } - f.preDefined = true - - // Create encoder as well - enc := &fsePredefEnc[i] - copy(enc.norm[:], f.norm[:]) - enc.symbolLen = f.symbolLen - enc.actualTableLog = f.actualTableLog - if err := enc.buildCTable(); err != nil { - panic(fmt.Errorf("building encoding table %v: %v", tableIndex(i), err)) - } - enc.setBits(bitTables[i]) - enc.preDefined = true - } - }) -} diff --git a/vendor/github.com/klauspost/compress/zstd/hash.go b/vendor/github.com/klauspost/compress/zstd/hash.go deleted file mode 100644 index 5d73c21eb..000000000 --- a/vendor/github.com/klauspost/compress/zstd/hash.go +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -const ( - prime3bytes = 506832829 - prime4bytes = 2654435761 - prime5bytes = 889523592379 - prime6bytes = 227718039650203 - prime7bytes = 58295818150454627 - prime8bytes = 0xcf1bbcdcb7a56463 -) - -// hashLen returns a hash of the lowest mls bytes of with length output bits. -// mls must be >=3 and <=8. Any other value will return hash for 4 bytes. -// length should always be < 32. -// Preferably length and mls should be a constant for inlining. -func hashLen(u uint64, length, mls uint8) uint32 { - switch mls { - case 3: - return (uint32(u<<8) * prime3bytes) >> (32 - length) - case 5: - return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length)) - case 6: - return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length)) - case 7: - return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length)) - case 8: - return uint32((u * prime8bytes) >> (64 - length)) - default: - return (uint32(u) * prime4bytes) >> (32 - length) - } -} diff --git a/vendor/github.com/klauspost/compress/zstd/history.go b/vendor/github.com/klauspost/compress/zstd/history.go deleted file mode 100644 index 09164856d..000000000 --- a/vendor/github.com/klauspost/compress/zstd/history.go +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "github.com/klauspost/compress/huff0" -) - -// history contains the information transferred between blocks. -type history struct { - // Literal decompression - huffTree *huff0.Scratch - - // Sequence decompression - decoders sequenceDecs - recentOffsets [3]int - - // History buffer... - b []byte - - // ignoreBuffer is meant to ignore a number of bytes - // when checking for matches in history - ignoreBuffer int - - windowSize int - allocFrameBuffer int // needed? - error bool - dict *dict -} - -// reset will reset the history to initial state of a frame. -// The history must already have been initialized to the desired size. -func (h *history) reset() { - h.b = h.b[:0] - h.ignoreBuffer = 0 - h.error = false - h.recentOffsets = [3]int{1, 4, 8} - h.decoders.freeDecoders() - h.decoders = sequenceDecs{br: h.decoders.br} - h.freeHuffDecoder() - h.huffTree = nil - h.dict = nil - //printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b)) -} - -func (h *history) freeHuffDecoder() { - if h.huffTree != nil { - if h.dict == nil || h.dict.litEnc != h.huffTree { - huffDecoderPool.Put(h.huffTree) - h.huffTree = nil - } - } -} - -func (h *history) setDict(dict *dict) { - if dict == nil { - return - } - h.dict = dict - h.decoders.litLengths = dict.llDec - h.decoders.offsets = dict.ofDec - h.decoders.matchLengths = dict.mlDec - h.decoders.dict = dict.content - h.recentOffsets = dict.offsets - h.huffTree = dict.litEnc -} - -// append bytes to history. -// This function will make sure there is space for it, -// if the buffer has been allocated with enough extra space. -func (h *history) append(b []byte) { - if len(b) >= h.windowSize { - // Discard all history by simply overwriting - h.b = h.b[:h.windowSize] - copy(h.b, b[len(b)-h.windowSize:]) - return - } - - // If there is space, append it. - if len(b) < cap(h.b)-len(h.b) { - h.b = append(h.b, b...) - return - } - - // Move data down so we only have window size left. - // We know we have less than window size in b at this point. - discard := len(b) + len(h.b) - h.windowSize - copy(h.b, h.b[discard:]) - h.b = h.b[:h.windowSize] - copy(h.b[h.windowSize-len(b):], b) -} - -// ensureBlock will ensure there is space for at least one block... -func (h *history) ensureBlock() { - if cap(h.b) < h.allocFrameBuffer { - h.b = make([]byte, 0, h.allocFrameBuffer) - return - } - - avail := cap(h.b) - len(h.b) - if avail >= h.windowSize || avail > maxCompressedBlockSize { - return - } - // Move data down so we only have window size left. - // We know we have less than window size in b at this point. - discard := len(h.b) - h.windowSize - copy(h.b, h.b[discard:]) - h.b = h.b[:h.windowSize] -} - -// append bytes to history without ever discarding anything. -func (h *history) appendKeep(b []byte) { - h.b = append(h.b, b...) -} diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/LICENSE.txt b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/LICENSE.txt deleted file mode 100644 index 24b53065f..000000000 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/LICENSE.txt +++ /dev/null @@ -1,22 +0,0 @@ -Copyright (c) 2016 Caleb Spare - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md deleted file mode 100644 index 777290d44..000000000 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md +++ /dev/null @@ -1,71 +0,0 @@ -# xxhash - -VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package. - -xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a -high-quality hashing algorithm that is much faster than anything in the Go -standard library. - -This package provides a straightforward API: - -``` -func Sum64(b []byte) uint64 -func Sum64String(s string) uint64 -type Digest struct{ ... } - func New() *Digest -``` - -The `Digest` type implements hash.Hash64. Its key methods are: - -``` -func (*Digest) Write([]byte) (int, error) -func (*Digest) WriteString(string) (int, error) -func (*Digest) Sum64() uint64 -``` - -The package is written with optimized pure Go and also contains even faster -assembly implementations for amd64 and arm64. If desired, the `purego` build tag -opts into using the Go code even on those architectures. - -[xxHash]: http://cyan4973.github.io/xxHash/ - -## Compatibility - -This package is in a module and the latest code is in version 2 of the module. -You need a version of Go with at least "minimal module compatibility" to use -github.com/cespare/xxhash/v2: - -* 1.9.7+ for Go 1.9 -* 1.10.3+ for Go 1.10 -* Go 1.11 or later - -I recommend using the latest release of Go. - -## Benchmarks - -Here are some quick benchmarks comparing the pure-Go and assembly -implementations of Sum64. - -| input size | purego | asm | -| ---------- | --------- | --------- | -| 4 B | 1.3 GB/s | 1.2 GB/s | -| 16 B | 2.9 GB/s | 3.5 GB/s | -| 100 B | 6.9 GB/s | 8.1 GB/s | -| 4 KB | 11.7 GB/s | 16.7 GB/s | -| 10 MB | 12.0 GB/s | 17.3 GB/s | - -These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C -CPU using the following commands under Go 1.19.2: - -``` -benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$') -benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$') -``` - -## Projects using this package - -- [InfluxDB](https://github.com/influxdata/influxdb) -- [Prometheus](https://github.com/prometheus/prometheus) -- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) -- [FreeCache](https://github.com/coocood/freecache) -- [FastCache](https://github.com/VictoriaMetrics/fastcache) diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go deleted file mode 100644 index fc40c8200..000000000 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go +++ /dev/null @@ -1,230 +0,0 @@ -// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described -// at http://cyan4973.github.io/xxHash/. -// THIS IS VENDORED: Go to github.com/cespare/xxhash for original package. - -package xxhash - -import ( - "encoding/binary" - "errors" - "math/bits" -) - -const ( - prime1 uint64 = 11400714785074694791 - prime2 uint64 = 14029467366897019727 - prime3 uint64 = 1609587929392839161 - prime4 uint64 = 9650029242287828579 - prime5 uint64 = 2870177450012600261 -) - -// Store the primes in an array as well. -// -// The consts are used when possible in Go code to avoid MOVs but we need a -// contiguous array of the assembly code. -var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5} - -// Digest implements hash.Hash64. -type Digest struct { - v1 uint64 - v2 uint64 - v3 uint64 - v4 uint64 - total uint64 - mem [32]byte - n int // how much of mem is used -} - -// New creates a new Digest that computes the 64-bit xxHash algorithm. -func New() *Digest { - var d Digest - d.Reset() - return &d -} - -// Reset clears the Digest's state so that it can be reused. -func (d *Digest) Reset() { - d.v1 = primes[0] + prime2 - d.v2 = prime2 - d.v3 = 0 - d.v4 = -primes[0] - d.total = 0 - d.n = 0 -} - -// Size always returns 8 bytes. -func (d *Digest) Size() int { return 8 } - -// BlockSize always returns 32 bytes. -func (d *Digest) BlockSize() int { return 32 } - -// Write adds more data to d. It always returns len(b), nil. -func (d *Digest) Write(b []byte) (n int, err error) { - n = len(b) - d.total += uint64(n) - - memleft := d.mem[d.n&(len(d.mem)-1):] - - if d.n+n < 32 { - // This new data doesn't even fill the current block. - copy(memleft, b) - d.n += n - return - } - - if d.n > 0 { - // Finish off the partial block. - c := copy(memleft, b) - d.v1 = round(d.v1, u64(d.mem[0:8])) - d.v2 = round(d.v2, u64(d.mem[8:16])) - d.v3 = round(d.v3, u64(d.mem[16:24])) - d.v4 = round(d.v4, u64(d.mem[24:32])) - b = b[c:] - d.n = 0 - } - - if len(b) >= 32 { - // One or more full blocks left. - nw := writeBlocks(d, b) - b = b[nw:] - } - - // Store any remaining partial block. - copy(d.mem[:], b) - d.n = len(b) - - return -} - -// Sum appends the current hash to b and returns the resulting slice. -func (d *Digest) Sum(b []byte) []byte { - s := d.Sum64() - return append( - b, - byte(s>>56), - byte(s>>48), - byte(s>>40), - byte(s>>32), - byte(s>>24), - byte(s>>16), - byte(s>>8), - byte(s), - ) -} - -// Sum64 returns the current hash. -func (d *Digest) Sum64() uint64 { - var h uint64 - - if d.total >= 32 { - v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4 - h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) - h = mergeRound(h, v1) - h = mergeRound(h, v2) - h = mergeRound(h, v3) - h = mergeRound(h, v4) - } else { - h = d.v3 + prime5 - } - - h += d.total - - b := d.mem[:d.n&(len(d.mem)-1)] - for ; len(b) >= 8; b = b[8:] { - k1 := round(0, u64(b[:8])) - h ^= k1 - h = rol27(h)*prime1 + prime4 - } - if len(b) >= 4 { - h ^= uint64(u32(b[:4])) * prime1 - h = rol23(h)*prime2 + prime3 - b = b[4:] - } - for ; len(b) > 0; b = b[1:] { - h ^= uint64(b[0]) * prime5 - h = rol11(h) * prime1 - } - - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 - - return h -} - -const ( - magic = "xxh\x06" - marshaledSize = len(magic) + 8*5 + 32 -) - -// MarshalBinary implements the encoding.BinaryMarshaler interface. -func (d *Digest) MarshalBinary() ([]byte, error) { - b := make([]byte, 0, marshaledSize) - b = append(b, magic...) - b = appendUint64(b, d.v1) - b = appendUint64(b, d.v2) - b = appendUint64(b, d.v3) - b = appendUint64(b, d.v4) - b = appendUint64(b, d.total) - b = append(b, d.mem[:d.n]...) - b = b[:len(b)+len(d.mem)-d.n] - return b, nil -} - -// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface. -func (d *Digest) UnmarshalBinary(b []byte) error { - if len(b) < len(magic) || string(b[:len(magic)]) != magic { - return errors.New("xxhash: invalid hash state identifier") - } - if len(b) != marshaledSize { - return errors.New("xxhash: invalid hash state size") - } - b = b[len(magic):] - b, d.v1 = consumeUint64(b) - b, d.v2 = consumeUint64(b) - b, d.v3 = consumeUint64(b) - b, d.v4 = consumeUint64(b) - b, d.total = consumeUint64(b) - copy(d.mem[:], b) - d.n = int(d.total % uint64(len(d.mem))) - return nil -} - -func appendUint64(b []byte, x uint64) []byte { - var a [8]byte - binary.LittleEndian.PutUint64(a[:], x) - return append(b, a[:]...) -} - -func consumeUint64(b []byte) ([]byte, uint64) { - x := u64(b) - return b[8:], x -} - -func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) } -func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) } - -func round(acc, input uint64) uint64 { - acc += input * prime2 - acc = rol31(acc) - acc *= prime1 - return acc -} - -func mergeRound(acc, val uint64) uint64 { - val = round(0, val) - acc ^= val - acc = acc*prime1 + prime4 - return acc -} - -func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) } -func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) } -func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) } -func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) } -func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) } -func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) } -func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) } -func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) } diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s deleted file mode 100644 index ddb63aa91..000000000 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s +++ /dev/null @@ -1,210 +0,0 @@ -//go:build !appengine && gc && !purego && !noasm -// +build !appengine -// +build gc -// +build !purego -// +build !noasm - -#include "textflag.h" - -// Registers: -#define h AX -#define d AX -#define p SI // pointer to advance through b -#define n DX -#define end BX // loop end -#define v1 R8 -#define v2 R9 -#define v3 R10 -#define v4 R11 -#define x R12 -#define prime1 R13 -#define prime2 R14 -#define prime4 DI - -#define round(acc, x) \ - IMULQ prime2, x \ - ADDQ x, acc \ - ROLQ $31, acc \ - IMULQ prime1, acc - -// round0 performs the operation x = round(0, x). -#define round0(x) \ - IMULQ prime2, x \ - ROLQ $31, x \ - IMULQ prime1, x - -// mergeRound applies a merge round on the two registers acc and x. -// It assumes that prime1, prime2, and prime4 have been loaded. -#define mergeRound(acc, x) \ - round0(x) \ - XORQ x, acc \ - IMULQ prime1, acc \ - ADDQ prime4, acc - -// blockLoop processes as many 32-byte blocks as possible, -// updating v1, v2, v3, and v4. It assumes that there is at least one block -// to process. -#define blockLoop() \ -loop: \ - MOVQ +0(p), x \ - round(v1, x) \ - MOVQ +8(p), x \ - round(v2, x) \ - MOVQ +16(p), x \ - round(v3, x) \ - MOVQ +24(p), x \ - round(v4, x) \ - ADDQ $32, p \ - CMPQ p, end \ - JLE loop - -// func Sum64(b []byte) uint64 -TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 - // Load fixed primes. - MOVQ ·primes+0(SB), prime1 - MOVQ ·primes+8(SB), prime2 - MOVQ ·primes+24(SB), prime4 - - // Load slice. - MOVQ b_base+0(FP), p - MOVQ b_len+8(FP), n - LEAQ (p)(n*1), end - - // The first loop limit will be len(b)-32. - SUBQ $32, end - - // Check whether we have at least one block. - CMPQ n, $32 - JLT noBlocks - - // Set up initial state (v1, v2, v3, v4). - MOVQ prime1, v1 - ADDQ prime2, v1 - MOVQ prime2, v2 - XORQ v3, v3 - XORQ v4, v4 - SUBQ prime1, v4 - - blockLoop() - - MOVQ v1, h - ROLQ $1, h - MOVQ v2, x - ROLQ $7, x - ADDQ x, h - MOVQ v3, x - ROLQ $12, x - ADDQ x, h - MOVQ v4, x - ROLQ $18, x - ADDQ x, h - - mergeRound(h, v1) - mergeRound(h, v2) - mergeRound(h, v3) - mergeRound(h, v4) - - JMP afterBlocks - -noBlocks: - MOVQ ·primes+32(SB), h - -afterBlocks: - ADDQ n, h - - ADDQ $24, end - CMPQ p, end - JG try4 - -loop8: - MOVQ (p), x - ADDQ $8, p - round0(x) - XORQ x, h - ROLQ $27, h - IMULQ prime1, h - ADDQ prime4, h - - CMPQ p, end - JLE loop8 - -try4: - ADDQ $4, end - CMPQ p, end - JG try1 - - MOVL (p), x - ADDQ $4, p - IMULQ prime1, x - XORQ x, h - - ROLQ $23, h - IMULQ prime2, h - ADDQ ·primes+16(SB), h - -try1: - ADDQ $4, end - CMPQ p, end - JGE finalize - -loop1: - MOVBQZX (p), x - ADDQ $1, p - IMULQ ·primes+32(SB), x - XORQ x, h - ROLQ $11, h - IMULQ prime1, h - - CMPQ p, end - JL loop1 - -finalize: - MOVQ h, x - SHRQ $33, x - XORQ x, h - IMULQ prime2, h - MOVQ h, x - SHRQ $29, x - XORQ x, h - IMULQ ·primes+16(SB), h - MOVQ h, x - SHRQ $32, x - XORQ x, h - - MOVQ h, ret+24(FP) - RET - -// func writeBlocks(d *Digest, b []byte) int -TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 - // Load fixed primes needed for round. - MOVQ ·primes+0(SB), prime1 - MOVQ ·primes+8(SB), prime2 - - // Load slice. - MOVQ b_base+8(FP), p - MOVQ b_len+16(FP), n - LEAQ (p)(n*1), end - SUBQ $32, end - - // Load vN from d. - MOVQ s+0(FP), d - MOVQ 0(d), v1 - MOVQ 8(d), v2 - MOVQ 16(d), v3 - MOVQ 24(d), v4 - - // We don't need to check the loop condition here; this function is - // always called with at least one block of data to process. - blockLoop() - - // Copy vN back to d. - MOVQ v1, 0(d) - MOVQ v2, 8(d) - MOVQ v3, 16(d) - MOVQ v4, 24(d) - - // The number of bytes written is p minus the old base pointer. - SUBQ b_base+8(FP), p - MOVQ p, ret+32(FP) - - RET diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s deleted file mode 100644 index ae7d4d329..000000000 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s +++ /dev/null @@ -1,184 +0,0 @@ -//go:build !appengine && gc && !purego && !noasm -// +build !appengine -// +build gc -// +build !purego -// +build !noasm - -#include "textflag.h" - -// Registers: -#define digest R1 -#define h R2 // return value -#define p R3 // input pointer -#define n R4 // input length -#define nblocks R5 // n / 32 -#define prime1 R7 -#define prime2 R8 -#define prime3 R9 -#define prime4 R10 -#define prime5 R11 -#define v1 R12 -#define v2 R13 -#define v3 R14 -#define v4 R15 -#define x1 R20 -#define x2 R21 -#define x3 R22 -#define x4 R23 - -#define round(acc, x) \ - MADD prime2, acc, x, acc \ - ROR $64-31, acc \ - MUL prime1, acc - -// round0 performs the operation x = round(0, x). -#define round0(x) \ - MUL prime2, x \ - ROR $64-31, x \ - MUL prime1, x - -#define mergeRound(acc, x) \ - round0(x) \ - EOR x, acc \ - MADD acc, prime4, prime1, acc - -// blockLoop processes as many 32-byte blocks as possible, -// updating v1, v2, v3, and v4. It assumes that n >= 32. -#define blockLoop() \ - LSR $5, n, nblocks \ - PCALIGN $16 \ - loop: \ - LDP.P 16(p), (x1, x2) \ - LDP.P 16(p), (x3, x4) \ - round(v1, x1) \ - round(v2, x2) \ - round(v3, x3) \ - round(v4, x4) \ - SUB $1, nblocks \ - CBNZ nblocks, loop - -// func Sum64(b []byte) uint64 -TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 - LDP b_base+0(FP), (p, n) - - LDP ·primes+0(SB), (prime1, prime2) - LDP ·primes+16(SB), (prime3, prime4) - MOVD ·primes+32(SB), prime5 - - CMP $32, n - CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 } - BLT afterLoop - - ADD prime1, prime2, v1 - MOVD prime2, v2 - MOVD $0, v3 - NEG prime1, v4 - - blockLoop() - - ROR $64-1, v1, x1 - ROR $64-7, v2, x2 - ADD x1, x2 - ROR $64-12, v3, x3 - ROR $64-18, v4, x4 - ADD x3, x4 - ADD x2, x4, h - - mergeRound(h, v1) - mergeRound(h, v2) - mergeRound(h, v3) - mergeRound(h, v4) - -afterLoop: - ADD n, h - - TBZ $4, n, try8 - LDP.P 16(p), (x1, x2) - - round0(x1) - - // NOTE: here and below, sequencing the EOR after the ROR (using a - // rotated register) is worth a small but measurable speedup for small - // inputs. - ROR $64-27, h - EOR x1 @> 64-27, h, h - MADD h, prime4, prime1, h - - round0(x2) - ROR $64-27, h - EOR x2 @> 64-27, h, h - MADD h, prime4, prime1, h - -try8: - TBZ $3, n, try4 - MOVD.P 8(p), x1 - - round0(x1) - ROR $64-27, h - EOR x1 @> 64-27, h, h - MADD h, prime4, prime1, h - -try4: - TBZ $2, n, try2 - MOVWU.P 4(p), x2 - - MUL prime1, x2 - ROR $64-23, h - EOR x2 @> 64-23, h, h - MADD h, prime3, prime2, h - -try2: - TBZ $1, n, try1 - MOVHU.P 2(p), x3 - AND $255, x3, x1 - LSR $8, x3, x2 - - MUL prime5, x1 - ROR $64-11, h - EOR x1 @> 64-11, h, h - MUL prime1, h - - MUL prime5, x2 - ROR $64-11, h - EOR x2 @> 64-11, h, h - MUL prime1, h - -try1: - TBZ $0, n, finalize - MOVBU (p), x4 - - MUL prime5, x4 - ROR $64-11, h - EOR x4 @> 64-11, h, h - MUL prime1, h - -finalize: - EOR h >> 33, h - MUL prime2, h - EOR h >> 29, h - MUL prime3, h - EOR h >> 32, h - - MOVD h, ret+24(FP) - RET - -// func writeBlocks(s *Digest, b []byte) int -TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 - LDP ·primes+0(SB), (prime1, prime2) - - // Load state. Assume v[1-4] are stored contiguously. - MOVD s+0(FP), digest - LDP 0(digest), (v1, v2) - LDP 16(digest), (v3, v4) - - LDP b_base+8(FP), (p, n) - - blockLoop() - - // Store updated state. - STP (v1, v2), 0(digest) - STP (v3, v4), 16(digest) - - BIC $31, n - MOVD n, ret+32(FP) - RET diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go deleted file mode 100644 index d4221edf4..000000000 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go +++ /dev/null @@ -1,16 +0,0 @@ -//go:build (amd64 || arm64) && !appengine && gc && !purego && !noasm -// +build amd64 arm64 -// +build !appengine -// +build gc -// +build !purego -// +build !noasm - -package xxhash - -// Sum64 computes the 64-bit xxHash digest of b. -// -//go:noescape -func Sum64(b []byte) uint64 - -//go:noescape -func writeBlocks(s *Digest, b []byte) int diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go deleted file mode 100644 index 0be16cefc..000000000 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go +++ /dev/null @@ -1,76 +0,0 @@ -//go:build (!amd64 && !arm64) || appengine || !gc || purego || noasm -// +build !amd64,!arm64 appengine !gc purego noasm - -package xxhash - -// Sum64 computes the 64-bit xxHash digest of b. -func Sum64(b []byte) uint64 { - // A simpler version would be - // d := New() - // d.Write(b) - // return d.Sum64() - // but this is faster, particularly for small inputs. - - n := len(b) - var h uint64 - - if n >= 32 { - v1 := primes[0] + prime2 - v2 := prime2 - v3 := uint64(0) - v4 := -primes[0] - for len(b) >= 32 { - v1 = round(v1, u64(b[0:8:len(b)])) - v2 = round(v2, u64(b[8:16:len(b)])) - v3 = round(v3, u64(b[16:24:len(b)])) - v4 = round(v4, u64(b[24:32:len(b)])) - b = b[32:len(b):len(b)] - } - h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) - h = mergeRound(h, v1) - h = mergeRound(h, v2) - h = mergeRound(h, v3) - h = mergeRound(h, v4) - } else { - h = prime5 - } - - h += uint64(n) - - for ; len(b) >= 8; b = b[8:] { - k1 := round(0, u64(b[:8])) - h ^= k1 - h = rol27(h)*prime1 + prime4 - } - if len(b) >= 4 { - h ^= uint64(u32(b[:4])) * prime1 - h = rol23(h)*prime2 + prime3 - b = b[4:] - } - for ; len(b) > 0; b = b[1:] { - h ^= uint64(b[0]) * prime5 - h = rol11(h) * prime1 - } - - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 - - return h -} - -func writeBlocks(d *Digest, b []byte) int { - v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4 - n := len(b) - for len(b) >= 32 { - v1 = round(v1, u64(b[0:8:len(b)])) - v2 = round(v2, u64(b[8:16:len(b)])) - v3 = round(v3, u64(b[16:24:len(b)])) - v4 = round(v4, u64(b[24:32:len(b)])) - b = b[32:len(b):len(b)] - } - d.v1, d.v2, d.v3, d.v4 = v1, v2, v3, v4 - return n - len(b) -} diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_safe.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_safe.go deleted file mode 100644 index 6f3b0cb10..000000000 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_safe.go +++ /dev/null @@ -1,11 +0,0 @@ -package xxhash - -// Sum64String computes the 64-bit xxHash digest of s. -func Sum64String(s string) uint64 { - return Sum64([]byte(s)) -} - -// WriteString adds more data to d. It always returns len(s), nil. -func (d *Digest) WriteString(s string) (n int, err error) { - return d.Write([]byte(s)) -} diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go deleted file mode 100644 index f41932b7a..000000000 --- a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go +++ /dev/null @@ -1,16 +0,0 @@ -//go:build amd64 && !appengine && !noasm && gc -// +build amd64,!appengine,!noasm,gc - -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. - -package zstd - -// matchLen returns how many bytes match in a and b -// -// It assumes that: -// -// len(a) <= len(b) and len(a) > 0 -// -//go:noescape -func matchLen(a []byte, b []byte) int diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s deleted file mode 100644 index 0782b86e3..000000000 --- a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s +++ /dev/null @@ -1,66 +0,0 @@ -// Copied from S2 implementation. - -//go:build !appengine && !noasm && gc && !noasm - -#include "textflag.h" - -// func matchLen(a []byte, b []byte) int -TEXT ·matchLen(SB), NOSPLIT, $0-56 - MOVQ a_base+0(FP), AX - MOVQ b_base+24(FP), CX - MOVQ a_len+8(FP), DX - - // matchLen - XORL SI, SI - CMPL DX, $0x08 - JB matchlen_match4_standalone - -matchlen_loopback_standalone: - MOVQ (AX)(SI*1), BX - XORQ (CX)(SI*1), BX - JZ matchlen_loop_standalone - -#ifdef GOAMD64_v3 - TZCNTQ BX, BX -#else - BSFQ BX, BX -#endif - SHRL $0x03, BX - LEAL (SI)(BX*1), SI - JMP gen_match_len_end - -matchlen_loop_standalone: - LEAL -8(DX), DX - LEAL 8(SI), SI - CMPL DX, $0x08 - JAE matchlen_loopback_standalone - -matchlen_match4_standalone: - CMPL DX, $0x04 - JB matchlen_match2_standalone - MOVL (AX)(SI*1), BX - CMPL (CX)(SI*1), BX - JNE matchlen_match2_standalone - LEAL -4(DX), DX - LEAL 4(SI), SI - -matchlen_match2_standalone: - CMPL DX, $0x02 - JB matchlen_match1_standalone - MOVW (AX)(SI*1), BX - CMPW (CX)(SI*1), BX - JNE matchlen_match1_standalone - LEAL -2(DX), DX - LEAL 2(SI), SI - -matchlen_match1_standalone: - CMPL DX, $0x01 - JB gen_match_len_end - MOVB (AX)(SI*1), BL - CMPB (CX)(SI*1), BL - JNE gen_match_len_end - INCL SI - -gen_match_len_end: - MOVQ SI, ret+48(FP) - RET diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go b/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go deleted file mode 100644 index bea1779e9..000000000 --- a/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go +++ /dev/null @@ -1,38 +0,0 @@ -//go:build !amd64 || appengine || !gc || noasm -// +build !amd64 appengine !gc noasm - -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. - -package zstd - -import ( - "math/bits" - - "github.com/klauspost/compress/internal/le" -) - -// matchLen returns the maximum common prefix length of a and b. -// a must be the shortest of the two. -func matchLen(a, b []byte) (n int) { - left := len(a) - for left >= 8 { - diff := le.Load64(a, n) ^ le.Load64(b, n) - if diff != 0 { - return n + bits.TrailingZeros64(diff)>>3 - } - n += 8 - left -= 8 - } - a = a[n:] - b = b[n:] - - for i := range a { - if a[i] != b[i] { - break - } - n++ - } - return n - -} diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go deleted file mode 100644 index 9a7de82f9..000000000 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ /dev/null @@ -1,503 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "errors" - "fmt" - "io" -) - -type seq struct { - litLen uint32 - matchLen uint32 - offset uint32 - - // Codes are stored here for the encoder - // so they only have to be looked up once. - llCode, mlCode, ofCode uint8 -} - -type seqVals struct { - ll, ml, mo int -} - -func (s seq) String() string { - if s.offset <= 3 { - if s.offset == 0 { - return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset: INVALID (0)") - } - return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset, " (repeat)") - } - return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset-3, " (new)") -} - -type seqCompMode uint8 - -const ( - compModePredefined seqCompMode = iota - compModeRLE - compModeFSE - compModeRepeat -) - -type sequenceDec struct { - // decoder keeps track of the current state and updates it from the bitstream. - fse *fseDecoder - state fseState - repeat bool -} - -// init the state of the decoder with input from stream. -func (s *sequenceDec) init(br *bitReader) error { - if s.fse == nil { - return errors.New("sequence decoder not defined") - } - s.state.init(br, s.fse.actualTableLog, s.fse.dt[:1< cap(s.out) { - addBytes := s.seqSize + len(s.out) - s.out = append(s.out, make([]byte, addBytes)...) - s.out = s.out[:len(s.out)-addBytes] - } - - if debugDecoder { - printf("Execute %d seqs with hist %d, dict %d, literals: %d into %d bytes\n", len(seqs), len(hist), len(s.dict), len(s.literals), s.seqSize) - } - - var t = len(s.out) - out := s.out[:t+s.seqSize] - - for _, seq := range seqs { - // Add literals - copy(out[t:], s.literals[:seq.ll]) - t += seq.ll - s.literals = s.literals[seq.ll:] - - // Copy from dictionary... - if seq.mo > t+len(hist) || seq.mo > s.windowSize { - if len(s.dict) == 0 { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist)) - } - - // we may be in dictionary. - dictO := len(s.dict) - (seq.mo - (t + len(hist))) - if dictO < 0 || dictO >= len(s.dict) { - return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", seq.mo, t+len(hist)+len(s.dict)) - } - end := dictO + seq.ml - if end > len(s.dict) { - n := len(s.dict) - dictO - copy(out[t:], s.dict[dictO:]) - t += n - seq.ml -= n - } else { - copy(out[t:], s.dict[dictO:end]) - t += end - dictO - continue - } - } - - // Copy from history. - if v := seq.mo - t; v > 0 { - // v is the start position in history from end. - start := len(hist) - v - if seq.ml > v { - // Some goes into current block. - // Copy remainder of history - copy(out[t:], hist[start:]) - t += v - seq.ml -= v - } else { - copy(out[t:], hist[start:start+seq.ml]) - t += seq.ml - continue - } - } - // We must be in current buffer now - if seq.ml > 0 { - start := t - seq.mo - if seq.ml <= t-start { - // No overlap - copy(out[t:], out[start:start+seq.ml]) - t += seq.ml - continue - } else { - // Overlapping copy - // Extend destination slice and copy one byte at the time. - src := out[start : start+seq.ml] - dst := out[t:] - dst = dst[:len(src)] - t += len(src) - // Destination is the space we just added. - for i := range src { - dst[i] = src[i] - } - } - } - } - - // Add final literals - copy(out[t:], s.literals) - if debugDecoder { - t += len(s.literals) - if t != len(out) { - panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) - } - } - s.out = out - - return nil -} - -// decode sequences from the stream with the provided history. -func (s *sequenceDecs) decodeSync(hist []byte) error { - supported, err := s.decodeSyncSimple(hist) - if supported { - return err - } - - br := s.br - seqs := s.nSeqs - startSize := len(s.out) - // Grab full sizes tables, to avoid bounds checks. - llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] - llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state - out := s.out - maxBlockSize := maxCompressedBlockSize - if s.windowSize < maxBlockSize { - maxBlockSize = s.windowSize - } - - if debugDecoder { - println("decodeSync: decoding", seqs, "sequences", br.remain(), "bits remain on stream") - } - for i := seqs - 1; i >= 0; i-- { - if br.overread() { - printf("reading sequence %d, exceeded available data. Overread by %d\n", seqs-i, -br.remain()) - return io.ErrUnexpectedEOF - } - var ll, mo, ml int - if br.cursor > 4+((maxOffsetBits+16+16)>>3) { - // inlined function: - // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) - - // Final will not read from stream. - var llB, mlB, moB uint8 - ll, llB = llState.final() - ml, mlB = mlState.final() - mo, moB = ofState.final() - - // extra bits are stored in reverse order. - br.fillFast() - mo += br.getBits(moB) - if s.maxBits > 32 { - br.fillFast() - } - ml += br.getBits(mlB) - ll += br.getBits(llB) - - if moB > 1 { - s.prevOffset[2] = s.prevOffset[1] - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = mo - } else { - // mo = s.adjustOffset(mo, ll, moB) - // Inlined for rather big speedup - if ll == 0 { - // There is an exception though, when current sequence's literals_length = 0. - // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, - // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. - mo++ - } - - if mo == 0 { - mo = s.prevOffset[0] - } else { - var temp int - if mo == 3 { - temp = s.prevOffset[0] - 1 - } else { - temp = s.prevOffset[mo] - } - - if temp == 0 { - // 0 is not valid; input is corrupted; force offset to 1 - println("WARNING: temp was 0") - temp = 1 - } - - if mo != 1 { - s.prevOffset[2] = s.prevOffset[1] - } - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = temp - mo = temp - } - } - br.fillFast() - } else { - ll, mo, ml = s.next(br, llState, mlState, ofState) - br.fill() - } - - if debugSequences { - println("Seq", seqs-i-1, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml) - } - - if ll > len(s.literals) { - return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals)) - } - size := ll + ml + len(out) - if size-startSize > maxBlockSize { - return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) - } - if size > cap(out) { - // Not enough size, which can happen under high volume block streaming conditions - // but could be if destination slice is too small for sync operations. - // over-allocating here can create a large amount of GC pressure so we try to keep - // it as contained as possible - used := len(out) - startSize - addBytes := 256 + ll + ml + used>>2 - // Clamp to max block size. - if used+addBytes > maxBlockSize { - addBytes = maxBlockSize - used - } - out = append(out, make([]byte, addBytes)...) - out = out[:len(out)-addBytes] - } - if ml > maxMatchLen { - return fmt.Errorf("match len (%d) bigger than max allowed length", ml) - } - - // Add literals - out = append(out, s.literals[:ll]...) - s.literals = s.literals[ll:] - - if mo == 0 && ml > 0 { - return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) - } - - if mo > len(out)+len(hist) || mo > s.windowSize { - if len(s.dict) == 0 { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize) - } - - // we may be in dictionary. - dictO := len(s.dict) - (mo - (len(out) + len(hist))) - if dictO < 0 || dictO >= len(s.dict) { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize) - } - end := dictO + ml - if end > len(s.dict) { - out = append(out, s.dict[dictO:]...) - ml -= len(s.dict) - dictO - } else { - out = append(out, s.dict[dictO:end]...) - mo = 0 - ml = 0 - } - } - - // Copy from history. - // TODO: Blocks without history could be made to ignore this completely. - if v := mo - len(out); v > 0 { - // v is the start position in history from end. - start := len(hist) - v - if ml > v { - // Some goes into current block. - // Copy remainder of history - out = append(out, hist[start:]...) - ml -= v - } else { - out = append(out, hist[start:start+ml]...) - ml = 0 - } - } - // We must be in current buffer now - if ml > 0 { - start := len(out) - mo - if ml <= len(out)-start { - // No overlap - out = append(out, out[start:start+ml]...) - } else { - // Overlapping copy - // Extend destination slice and copy one byte at the time. - out = out[:len(out)+ml] - src := out[start : start+ml] - // Destination is the space we just added. - dst := out[len(out)-ml:] - dst = dst[:len(src)] - for i := range src { - dst[i] = src[i] - } - } - } - if i == 0 { - // This is the last sequence, so we shouldn't update state. - break - } - - // Manually inlined, ~ 5-20% faster - // Update all 3 states at once. Approx 20% faster. - nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits() - if nBits == 0 { - llState = llTable[llState.newState()&maxTableMask] - mlState = mlTable[mlState.newState()&maxTableMask] - ofState = ofTable[ofState.newState()&maxTableMask] - } else { - bits := br.get32BitsFast(nBits) - - lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) - llState = llTable[(llState.newState()+lowBits)&maxTableMask] - - lowBits = uint16(bits >> (ofState.nbBits() & 31)) - lowBits &= bitMask[mlState.nbBits()&15] - mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask] - - lowBits = uint16(bits) & bitMask[ofState.nbBits()&15] - ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask] - } - } - - if size := len(s.literals) + len(out) - startSize; size > maxBlockSize { - return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) - } - - // Add final literals - s.out = append(out, s.literals...) - return br.close() -} - -var bitMask [16]uint16 - -func init() { - for i := range bitMask[:] { - bitMask[i] = uint16((1 << uint(i)) - 1) - } -} - -func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) { - // Final will not read from stream. - ll, llB := llState.final() - ml, mlB := mlState.final() - mo, moB := ofState.final() - - // extra bits are stored in reverse order. - br.fill() - mo += br.getBits(moB) - if s.maxBits > 32 { - br.fill() - } - // matchlength+literal length, max 32 bits - ml += br.getBits(mlB) - ll += br.getBits(llB) - mo = s.adjustOffset(mo, ll, moB) - return -} - -func (s *sequenceDecs) adjustOffset(offset, litLen int, offsetB uint8) int { - if offsetB > 1 { - s.prevOffset[2] = s.prevOffset[1] - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = offset - return offset - } - - if litLen == 0 { - // There is an exception though, when current sequence's literals_length = 0. - // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, - // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. - offset++ - } - - if offset == 0 { - return s.prevOffset[0] - } - var temp int - if offset == 3 { - temp = s.prevOffset[0] - 1 - } else { - temp = s.prevOffset[offset] - } - - if temp == 0 { - // 0 is not valid; input is corrupted; force offset to 1 - println("temp was 0") - temp = 1 - } - - if offset != 1 { - s.prevOffset[2] = s.prevOffset[1] - } - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = temp - return temp -} diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go deleted file mode 100644 index c59f17e07..000000000 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go +++ /dev/null @@ -1,394 +0,0 @@ -//go:build amd64 && !appengine && !noasm && gc -// +build amd64,!appengine,!noasm,gc - -package zstd - -import ( - "fmt" - "io" - - "github.com/klauspost/compress/internal/cpuinfo" -) - -type decodeSyncAsmContext struct { - llTable []decSymbol - mlTable []decSymbol - ofTable []decSymbol - llState uint64 - mlState uint64 - ofState uint64 - iteration int - litRemain int - out []byte - outPosition int - literals []byte - litPosition int - history []byte - windowSize int - ll int // set on error (not for all errors, please refer to _generate/gen.go) - ml int // set on error (not for all errors, please refer to _generate/gen.go) - mo int // set on error (not for all errors, please refer to _generate/gen.go) -} - -// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm. -// -// Please refer to seqdec_generic.go for the reference implementation. -// -//go:noescape -func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int - -// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions. -// -//go:noescape -func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int - -// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer. -// -//go:noescape -func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int - -// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer. -// -//go:noescape -func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int - -// decode sequences from the stream with the provided history but without a dictionary. -func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { - if len(s.dict) > 0 { - return false, nil - } - if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize { - return false, nil - } - - // FIXME: Using unsafe memory copies leads to rare, random crashes - // with fuzz testing. It is therefore disabled for now. - const useSafe = true - /* - useSafe := false - if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc { - useSafe = true - } - if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) { - useSafe = true - } - if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc { - useSafe = true - } - */ - - br := s.br - - maxBlockSize := maxCompressedBlockSize - if s.windowSize < maxBlockSize { - maxBlockSize = s.windowSize - } - - ctx := decodeSyncAsmContext{ - llTable: s.litLengths.fse.dt[:maxTablesize], - mlTable: s.matchLengths.fse.dt[:maxTablesize], - ofTable: s.offsets.fse.dt[:maxTablesize], - llState: uint64(s.litLengths.state.state), - mlState: uint64(s.matchLengths.state.state), - ofState: uint64(s.offsets.state.state), - iteration: s.nSeqs - 1, - litRemain: len(s.literals), - out: s.out, - outPosition: len(s.out), - literals: s.literals, - windowSize: s.windowSize, - history: hist, - } - - s.seqSize = 0 - startSize := len(s.out) - - var errCode int - if cpuinfo.HasBMI2() { - if useSafe { - errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx) - } else { - errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx) - } - } else { - if useSafe { - errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx) - } else { - errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx) - } - } - switch errCode { - case noError: - break - - case errorMatchLenOfsMismatch: - return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml) - - case errorMatchLenTooBig: - return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml) - - case errorMatchOffTooBig: - return true, fmt.Errorf("match offset (%d) bigger than current history (%d)", - ctx.mo, ctx.outPosition+len(hist)-startSize) - - case errorNotEnoughLiterals: - return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", - ctx.ll, ctx.litRemain+ctx.ll) - - case errorOverread: - return true, io.ErrUnexpectedEOF - - case errorNotEnoughSpace: - size := ctx.outPosition + ctx.ll + ctx.ml - if debugDecoder { - println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize) - } - return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) - - default: - return true, fmt.Errorf("sequenceDecs_decode returned erroneous code %d", errCode) - } - - s.seqSize += ctx.litRemain - if s.seqSize > maxBlockSize { - return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) - } - err := br.close() - if err != nil { - printf("Closing sequences: %v, %+v\n", err, *br) - return true, err - } - - s.literals = s.literals[ctx.litPosition:] - t := ctx.outPosition - s.out = s.out[:t] - - // Add final literals - s.out = append(s.out, s.literals...) - if debugDecoder { - t += len(s.literals) - if t != len(s.out) { - panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t)) - } - } - - return true, nil -} - -// -------------------------------------------------------------------------------- - -type decodeAsmContext struct { - llTable []decSymbol - mlTable []decSymbol - ofTable []decSymbol - llState uint64 - mlState uint64 - ofState uint64 - iteration int - seqs []seqVals - litRemain int -} - -const noError = 0 - -// error reported when mo == 0 && ml > 0 -const errorMatchLenOfsMismatch = 1 - -// error reported when ml > maxMatchLen -const errorMatchLenTooBig = 2 - -// error reported when mo > available history or mo > s.windowSize -const errorMatchOffTooBig = 3 - -// error reported when the sum of literal lengths exeeceds the literal buffer size -const errorNotEnoughLiterals = 4 - -// error reported when capacity of `out` is too small -const errorNotEnoughSpace = 5 - -// error reported when bits are overread. -const errorOverread = 6 - -// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm. -// -// Please refer to seqdec_generic.go for the reference implementation. -// -//go:noescape -func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int - -// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm. -// -// Please refer to seqdec_generic.go for the reference implementation. -// -//go:noescape -func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int - -// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions. -// -//go:noescape -func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int - -// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions. -// -//go:noescape -func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int - -// decode sequences from the stream without the provided history. -func (s *sequenceDecs) decode(seqs []seqVals) error { - br := s.br - - maxBlockSize := maxCompressedBlockSize - if s.windowSize < maxBlockSize { - maxBlockSize = s.windowSize - } - - ctx := decodeAsmContext{ - llTable: s.litLengths.fse.dt[:maxTablesize], - mlTable: s.matchLengths.fse.dt[:maxTablesize], - ofTable: s.offsets.fse.dt[:maxTablesize], - llState: uint64(s.litLengths.state.state), - mlState: uint64(s.matchLengths.state.state), - ofState: uint64(s.offsets.state.state), - seqs: seqs, - iteration: len(seqs) - 1, - litRemain: len(s.literals), - } - - if debugDecoder { - println("decode: decoding", len(seqs), "sequences", br.remain(), "bits remain on stream") - } - - s.seqSize = 0 - lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56 - var errCode int - if cpuinfo.HasBMI2() { - if lte56bits { - errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx) - } else { - errCode = sequenceDecs_decode_bmi2(s, br, &ctx) - } - } else { - if lte56bits { - errCode = sequenceDecs_decode_56_amd64(s, br, &ctx) - } else { - errCode = sequenceDecs_decode_amd64(s, br, &ctx) - } - } - if errCode != 0 { - i := len(seqs) - ctx.iteration - 1 - switch errCode { - case errorMatchLenOfsMismatch: - ml := ctx.seqs[i].ml - return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) - - case errorMatchLenTooBig: - ml := ctx.seqs[i].ml - return fmt.Errorf("match len (%d) bigger than max allowed length", ml) - - case errorNotEnoughLiterals: - ll := ctx.seqs[i].ll - return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll) - case errorOverread: - return io.ErrUnexpectedEOF - } - - return fmt.Errorf("sequenceDecs_decode_amd64 returned erroneous code %d", errCode) - } - - if ctx.litRemain < 0 { - return fmt.Errorf("literal count is too big: total available %d, total requested %d", - len(s.literals), len(s.literals)-ctx.litRemain) - } - - s.seqSize += ctx.litRemain - if s.seqSize > maxBlockSize { - return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) - } - if debugDecoder { - println("decode: ", br.remain(), "bits remain on stream. code:", errCode) - } - err := br.close() - if err != nil { - printf("Closing sequences: %v, %+v\n", err, *br) - } - return err -} - -// -------------------------------------------------------------------------------- - -type executeAsmContext struct { - seqs []seqVals - seqIndex int - out []byte - history []byte - literals []byte - outPosition int - litPosition int - windowSize int -} - -// sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm. -// -// Returns false if a match offset is too big. -// -// Please refer to seqdec_generic.go for the reference implementation. -// -//go:noescape -func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool - -// Same as above, but with safe memcopies -// -//go:noescape -func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool - -// executeSimple handles cases when dictionary is not used. -func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error { - // Ensure we have enough output size... - if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) { - addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc - s.out = append(s.out, make([]byte, addBytes)...) - s.out = s.out[:len(s.out)-addBytes] - } - - if debugDecoder { - printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize) - } - - var t = len(s.out) - out := s.out[:t+s.seqSize] - - ctx := executeAsmContext{ - seqs: seqs, - seqIndex: 0, - out: out, - history: hist, - outPosition: t, - litPosition: 0, - literals: s.literals, - windowSize: s.windowSize, - } - var ok bool - if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc { - ok = sequenceDecs_executeSimple_safe_amd64(&ctx) - } else { - ok = sequenceDecs_executeSimple_amd64(&ctx) - } - if !ok { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", - seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist)) - } - s.literals = s.literals[ctx.litPosition:] - t = ctx.outPosition - - // Add final literals - copy(out[t:], s.literals) - if debugDecoder { - t += len(s.literals) - if t != len(out) { - panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) - } - } - s.out = out - - return nil -} diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s deleted file mode 100644 index a708ca6d3..000000000 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s +++ /dev/null @@ -1,4151 +0,0 @@ -// Code generated by command: go run gen.go -out ../seqdec_amd64.s -pkg=zstd. DO NOT EDIT. - -//go:build !appengine && !noasm && gc && !noasm - -// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int -// Requires: CMOV -TEXT ·sequenceDecs_decode_amd64(SB), $8-32 - MOVQ br+8(FP), CX - MOVQ 24(CX), DX - MOVBQZX 40(CX), BX - MOVQ (CX), AX - MOVQ 32(CX), SI - ADDQ SI, AX - MOVQ AX, (SP) - MOVQ ctx+16(FP), AX - MOVQ 72(AX), DI - MOVQ 80(AX), R8 - MOVQ 88(AX), R9 - MOVQ 104(AX), R10 - MOVQ s+0(FP), AX - MOVQ 144(AX), R11 - MOVQ 152(AX), R12 - MOVQ 160(AX), R13 - -sequenceDecs_decode_amd64_main_loop: - MOVQ (SP), R14 - - // Fill bitreader to have enough for the offset and match length. - CMPQ SI, $0x08 - JL sequenceDecs_decode_amd64_fill_byte_by_byte - MOVQ BX, AX - SHRQ $0x03, AX - SUBQ AX, R14 - MOVQ (R14), DX - SUBQ AX, SI - ANDQ $0x07, BX - JMP sequenceDecs_decode_amd64_fill_end - -sequenceDecs_decode_amd64_fill_byte_by_byte: - CMPQ SI, $0x00 - JLE sequenceDecs_decode_amd64_fill_check_overread - CMPQ BX, $0x07 - JLE sequenceDecs_decode_amd64_fill_end - SHLQ $0x08, DX - SUBQ $0x01, R14 - SUBQ $0x01, SI - SUBQ $0x08, BX - MOVBQZX (R14), AX - ORQ AX, DX - JMP sequenceDecs_decode_amd64_fill_byte_by_byte - -sequenceDecs_decode_amd64_fill_check_overread: - CMPQ BX, $0x40 - JA error_overread - -sequenceDecs_decode_amd64_fill_end: - // Update offset - MOVQ R9, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - SHRQ $0x20, AX - TESTQ CX, CX - JZ sequenceDecs_decode_amd64_of_update_zero - ADDQ CX, BX - CMPQ BX, $0x40 - JA sequenceDecs_decode_amd64_of_update_zero - CMPQ CX, $0x40 - JAE sequenceDecs_decode_amd64_of_update_zero - NEGQ CX - SHRQ CL, R15 - ADDQ R15, AX - -sequenceDecs_decode_amd64_of_update_zero: - MOVQ AX, 16(R10) - - // Update match length - MOVQ R8, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - SHRQ $0x20, AX - TESTQ CX, CX - JZ sequenceDecs_decode_amd64_ml_update_zero - ADDQ CX, BX - CMPQ BX, $0x40 - JA sequenceDecs_decode_amd64_ml_update_zero - CMPQ CX, $0x40 - JAE sequenceDecs_decode_amd64_ml_update_zero - NEGQ CX - SHRQ CL, R15 - ADDQ R15, AX - -sequenceDecs_decode_amd64_ml_update_zero: - MOVQ AX, 8(R10) - - // Fill bitreader to have enough for the remaining - CMPQ SI, $0x08 - JL sequenceDecs_decode_amd64_fill_2_byte_by_byte - MOVQ BX, AX - SHRQ $0x03, AX - SUBQ AX, R14 - MOVQ (R14), DX - SUBQ AX, SI - ANDQ $0x07, BX - JMP sequenceDecs_decode_amd64_fill_2_end - -sequenceDecs_decode_amd64_fill_2_byte_by_byte: - CMPQ SI, $0x00 - JLE sequenceDecs_decode_amd64_fill_2_check_overread - CMPQ BX, $0x07 - JLE sequenceDecs_decode_amd64_fill_2_end - SHLQ $0x08, DX - SUBQ $0x01, R14 - SUBQ $0x01, SI - SUBQ $0x08, BX - MOVBQZX (R14), AX - ORQ AX, DX - JMP sequenceDecs_decode_amd64_fill_2_byte_by_byte - -sequenceDecs_decode_amd64_fill_2_check_overread: - CMPQ BX, $0x40 - JA error_overread - -sequenceDecs_decode_amd64_fill_2_end: - // Update literal length - MOVQ DI, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - SHRQ $0x20, AX - TESTQ CX, CX - JZ sequenceDecs_decode_amd64_ll_update_zero - ADDQ CX, BX - CMPQ BX, $0x40 - JA sequenceDecs_decode_amd64_ll_update_zero - CMPQ CX, $0x40 - JAE sequenceDecs_decode_amd64_ll_update_zero - NEGQ CX - SHRQ CL, R15 - ADDQ R15, AX - -sequenceDecs_decode_amd64_ll_update_zero: - MOVQ AX, (R10) - - // Fill bitreader for state updates - MOVQ R14, (SP) - MOVQ R9, AX - SHRQ $0x08, AX - MOVBQZX AL, AX - MOVQ ctx+16(FP), CX - CMPQ 96(CX), $0x00 - JZ sequenceDecs_decode_amd64_skip_update - - // Update Literal Length State - MOVBQZX DI, R14 - SHRL $0x10, DI - LEAQ (BX)(R14*1), CX - MOVQ DX, R15 - MOVQ CX, BX - ROLQ CL, R15 - MOVL $0x00000001, BP - MOVB R14, CL - SHLL CL, BP - DECL BP - ANDQ BP, R15 - ADDQ R15, DI - - // Load ctx.llTable - MOVQ ctx+16(FP), CX - MOVQ (CX), CX - MOVQ (CX)(DI*8), DI - - // Update Match Length State - MOVBQZX R8, R14 - SHRL $0x10, R8 - LEAQ (BX)(R14*1), CX - MOVQ DX, R15 - MOVQ CX, BX - ROLQ CL, R15 - MOVL $0x00000001, BP - MOVB R14, CL - SHLL CL, BP - DECL BP - ANDQ BP, R15 - ADDQ R15, R8 - - // Load ctx.mlTable - MOVQ ctx+16(FP), CX - MOVQ 24(CX), CX - MOVQ (CX)(R8*8), R8 - - // Update Offset State - MOVBQZX R9, R14 - SHRL $0x10, R9 - LEAQ (BX)(R14*1), CX - MOVQ DX, R15 - MOVQ CX, BX - ROLQ CL, R15 - MOVL $0x00000001, BP - MOVB R14, CL - SHLL CL, BP - DECL BP - ANDQ BP, R15 - ADDQ R15, R9 - - // Load ctx.ofTable - MOVQ ctx+16(FP), CX - MOVQ 48(CX), CX - MOVQ (CX)(R9*8), R9 - -sequenceDecs_decode_amd64_skip_update: - // Adjust offset - MOVQ 16(R10), CX - CMPQ AX, $0x01 - JBE sequenceDecs_decode_amd64_adjust_offsetB_1_or_0 - MOVQ R12, R13 - MOVQ R11, R12 - MOVQ CX, R11 - JMP sequenceDecs_decode_amd64_after_adjust - -sequenceDecs_decode_amd64_adjust_offsetB_1_or_0: - CMPQ (R10), $0x00000000 - JNE sequenceDecs_decode_amd64_adjust_offset_maybezero - INCQ CX - JMP sequenceDecs_decode_amd64_adjust_offset_nonzero - -sequenceDecs_decode_amd64_adjust_offset_maybezero: - TESTQ CX, CX - JNZ sequenceDecs_decode_amd64_adjust_offset_nonzero - MOVQ R11, CX - JMP sequenceDecs_decode_amd64_after_adjust - -sequenceDecs_decode_amd64_adjust_offset_nonzero: - CMPQ CX, $0x01 - JB sequenceDecs_decode_amd64_adjust_zero - JEQ sequenceDecs_decode_amd64_adjust_one - CMPQ CX, $0x02 - JA sequenceDecs_decode_amd64_adjust_three - JMP sequenceDecs_decode_amd64_adjust_two - -sequenceDecs_decode_amd64_adjust_zero: - MOVQ R11, AX - JMP sequenceDecs_decode_amd64_adjust_test_temp_valid - -sequenceDecs_decode_amd64_adjust_one: - MOVQ R12, AX - JMP sequenceDecs_decode_amd64_adjust_test_temp_valid - -sequenceDecs_decode_amd64_adjust_two: - MOVQ R13, AX - JMP sequenceDecs_decode_amd64_adjust_test_temp_valid - -sequenceDecs_decode_amd64_adjust_three: - LEAQ -1(R11), AX - -sequenceDecs_decode_amd64_adjust_test_temp_valid: - TESTQ AX, AX - JNZ sequenceDecs_decode_amd64_adjust_temp_valid - MOVQ $0x00000001, AX - -sequenceDecs_decode_amd64_adjust_temp_valid: - CMPQ CX, $0x01 - CMOVQNE R12, R13 - MOVQ R11, R12 - MOVQ AX, R11 - MOVQ AX, CX - -sequenceDecs_decode_amd64_after_adjust: - MOVQ CX, 16(R10) - - // Check values - MOVQ 8(R10), AX - MOVQ (R10), R14 - LEAQ (AX)(R14*1), R15 - MOVQ s+0(FP), BP - ADDQ R15, 256(BP) - MOVQ ctx+16(FP), R15 - SUBQ R14, 128(R15) - JS error_not_enough_literals - CMPQ AX, $0x00020002 - JA sequenceDecs_decode_amd64_error_match_len_too_big - TESTQ CX, CX - JNZ sequenceDecs_decode_amd64_match_len_ofs_ok - TESTQ AX, AX - JNZ sequenceDecs_decode_amd64_error_match_len_ofs_mismatch - -sequenceDecs_decode_amd64_match_len_ofs_ok: - ADDQ $0x18, R10 - MOVQ ctx+16(FP), AX - DECQ 96(AX) - JNS sequenceDecs_decode_amd64_main_loop - MOVQ s+0(FP), AX - MOVQ R11, 144(AX) - MOVQ R12, 152(AX) - MOVQ R13, 160(AX) - MOVQ br+8(FP), AX - MOVQ DX, 24(AX) - MOVB BL, 40(AX) - MOVQ SI, 32(AX) - - // Return success - MOVQ $0x00000000, ret+24(FP) - RET - - // Return with match length error -sequenceDecs_decode_amd64_error_match_len_ofs_mismatch: - MOVQ $0x00000001, ret+24(FP) - RET - - // Return with match too long error -sequenceDecs_decode_amd64_error_match_len_too_big: - MOVQ $0x00000002, ret+24(FP) - RET - - // Return with match offset too long error - MOVQ $0x00000003, ret+24(FP) - RET - - // Return with not enough literals error -error_not_enough_literals: - MOVQ $0x00000004, ret+24(FP) - RET - - // Return with overread error -error_overread: - MOVQ $0x00000006, ret+24(FP) - RET - -// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int -// Requires: CMOV -TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 - MOVQ br+8(FP), CX - MOVQ 24(CX), DX - MOVBQZX 40(CX), BX - MOVQ (CX), AX - MOVQ 32(CX), SI - ADDQ SI, AX - MOVQ AX, (SP) - MOVQ ctx+16(FP), AX - MOVQ 72(AX), DI - MOVQ 80(AX), R8 - MOVQ 88(AX), R9 - MOVQ 104(AX), R10 - MOVQ s+0(FP), AX - MOVQ 144(AX), R11 - MOVQ 152(AX), R12 - MOVQ 160(AX), R13 - -sequenceDecs_decode_56_amd64_main_loop: - MOVQ (SP), R14 - - // Fill bitreader to have enough for the offset and match length. - CMPQ SI, $0x08 - JL sequenceDecs_decode_56_amd64_fill_byte_by_byte - MOVQ BX, AX - SHRQ $0x03, AX - SUBQ AX, R14 - MOVQ (R14), DX - SUBQ AX, SI - ANDQ $0x07, BX - JMP sequenceDecs_decode_56_amd64_fill_end - -sequenceDecs_decode_56_amd64_fill_byte_by_byte: - CMPQ SI, $0x00 - JLE sequenceDecs_decode_56_amd64_fill_check_overread - CMPQ BX, $0x07 - JLE sequenceDecs_decode_56_amd64_fill_end - SHLQ $0x08, DX - SUBQ $0x01, R14 - SUBQ $0x01, SI - SUBQ $0x08, BX - MOVBQZX (R14), AX - ORQ AX, DX - JMP sequenceDecs_decode_56_amd64_fill_byte_by_byte - -sequenceDecs_decode_56_amd64_fill_check_overread: - CMPQ BX, $0x40 - JA error_overread - -sequenceDecs_decode_56_amd64_fill_end: - // Update offset - MOVQ R9, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - SHRQ $0x20, AX - TESTQ CX, CX - JZ sequenceDecs_decode_56_amd64_of_update_zero - ADDQ CX, BX - CMPQ BX, $0x40 - JA sequenceDecs_decode_56_amd64_of_update_zero - CMPQ CX, $0x40 - JAE sequenceDecs_decode_56_amd64_of_update_zero - NEGQ CX - SHRQ CL, R15 - ADDQ R15, AX - -sequenceDecs_decode_56_amd64_of_update_zero: - MOVQ AX, 16(R10) - - // Update match length - MOVQ R8, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - SHRQ $0x20, AX - TESTQ CX, CX - JZ sequenceDecs_decode_56_amd64_ml_update_zero - ADDQ CX, BX - CMPQ BX, $0x40 - JA sequenceDecs_decode_56_amd64_ml_update_zero - CMPQ CX, $0x40 - JAE sequenceDecs_decode_56_amd64_ml_update_zero - NEGQ CX - SHRQ CL, R15 - ADDQ R15, AX - -sequenceDecs_decode_56_amd64_ml_update_zero: - MOVQ AX, 8(R10) - - // Update literal length - MOVQ DI, AX - MOVQ BX, CX - MOVQ DX, R15 - SHLQ CL, R15 - MOVB AH, CL - SHRQ $0x20, AX - TESTQ CX, CX - JZ sequenceDecs_decode_56_amd64_ll_update_zero - ADDQ CX, BX - CMPQ BX, $0x40 - JA sequenceDecs_decode_56_amd64_ll_update_zero - CMPQ CX, $0x40 - JAE sequenceDecs_decode_56_amd64_ll_update_zero - NEGQ CX - SHRQ CL, R15 - ADDQ R15, AX - -sequenceDecs_decode_56_amd64_ll_update_zero: - MOVQ AX, (R10) - - // Fill bitreader for state updates - MOVQ R14, (SP) - MOVQ R9, AX - SHRQ $0x08, AX - MOVBQZX AL, AX - MOVQ ctx+16(FP), CX - CMPQ 96(CX), $0x00 - JZ sequenceDecs_decode_56_amd64_skip_update - - // Update Literal Length State - MOVBQZX DI, R14 - SHRL $0x10, DI - LEAQ (BX)(R14*1), CX - MOVQ DX, R15 - MOVQ CX, BX - ROLQ CL, R15 - MOVL $0x00000001, BP - MOVB R14, CL - SHLL CL, BP - DECL BP - ANDQ BP, R15 - ADDQ R15, DI - - // Load ctx.llTable - MOVQ ctx+16(FP), CX - MOVQ (CX), CX - MOVQ (CX)(DI*8), DI - - // Update Match Length State - MOVBQZX R8, R14 - SHRL $0x10, R8 - LEAQ (BX)(R14*1), CX - MOVQ DX, R15 - MOVQ CX, BX - ROLQ CL, R15 - MOVL $0x00000001, BP - MOVB R14, CL - SHLL CL, BP - DECL BP - ANDQ BP, R15 - ADDQ R15, R8 - - // Load ctx.mlTable - MOVQ ctx+16(FP), CX - MOVQ 24(CX), CX - MOVQ (CX)(R8*8), R8 - - // Update Offset State - MOVBQZX R9, R14 - SHRL $0x10, R9 - LEAQ (BX)(R14*1), CX - MOVQ DX, R15 - MOVQ CX, BX - ROLQ CL, R15 - MOVL $0x00000001, BP - MOVB R14, CL - SHLL CL, BP - DECL BP - ANDQ BP, R15 - ADDQ R15, R9 - - // Load ctx.ofTable - MOVQ ctx+16(FP), CX - MOVQ 48(CX), CX - MOVQ (CX)(R9*8), R9 - -sequenceDecs_decode_56_amd64_skip_update: - // Adjust offset - MOVQ 16(R10), CX - CMPQ AX, $0x01 - JBE sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0 - MOVQ R12, R13 - MOVQ R11, R12 - MOVQ CX, R11 - JMP sequenceDecs_decode_56_amd64_after_adjust - -sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0: - CMPQ (R10), $0x00000000 - JNE sequenceDecs_decode_56_amd64_adjust_offset_maybezero - INCQ CX - JMP sequenceDecs_decode_56_amd64_adjust_offset_nonzero - -sequenceDecs_decode_56_amd64_adjust_offset_maybezero: - TESTQ CX, CX - JNZ sequenceDecs_decode_56_amd64_adjust_offset_nonzero - MOVQ R11, CX - JMP sequenceDecs_decode_56_amd64_after_adjust - -sequenceDecs_decode_56_amd64_adjust_offset_nonzero: - CMPQ CX, $0x01 - JB sequenceDecs_decode_56_amd64_adjust_zero - JEQ sequenceDecs_decode_56_amd64_adjust_one - CMPQ CX, $0x02 - JA sequenceDecs_decode_56_amd64_adjust_three - JMP sequenceDecs_decode_56_amd64_adjust_two - -sequenceDecs_decode_56_amd64_adjust_zero: - MOVQ R11, AX - JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid - -sequenceDecs_decode_56_amd64_adjust_one: - MOVQ R12, AX - JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid - -sequenceDecs_decode_56_amd64_adjust_two: - MOVQ R13, AX - JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid - -sequenceDecs_decode_56_amd64_adjust_three: - LEAQ -1(R11), AX - -sequenceDecs_decode_56_amd64_adjust_test_temp_valid: - TESTQ AX, AX - JNZ sequenceDecs_decode_56_amd64_adjust_temp_valid - MOVQ $0x00000001, AX - -sequenceDecs_decode_56_amd64_adjust_temp_valid: - CMPQ CX, $0x01 - CMOVQNE R12, R13 - MOVQ R11, R12 - MOVQ AX, R11 - MOVQ AX, CX - -sequenceDecs_decode_56_amd64_after_adjust: - MOVQ CX, 16(R10) - - // Check values - MOVQ 8(R10), AX - MOVQ (R10), R14 - LEAQ (AX)(R14*1), R15 - MOVQ s+0(FP), BP - ADDQ R15, 256(BP) - MOVQ ctx+16(FP), R15 - SUBQ R14, 128(R15) - JS error_not_enough_literals - CMPQ AX, $0x00020002 - JA sequenceDecs_decode_56_amd64_error_match_len_too_big - TESTQ CX, CX - JNZ sequenceDecs_decode_56_amd64_match_len_ofs_ok - TESTQ AX, AX - JNZ sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch - -sequenceDecs_decode_56_amd64_match_len_ofs_ok: - ADDQ $0x18, R10 - MOVQ ctx+16(FP), AX - DECQ 96(AX) - JNS sequenceDecs_decode_56_amd64_main_loop - MOVQ s+0(FP), AX - MOVQ R11, 144(AX) - MOVQ R12, 152(AX) - MOVQ R13, 160(AX) - MOVQ br+8(FP), AX - MOVQ DX, 24(AX) - MOVB BL, 40(AX) - MOVQ SI, 32(AX) - - // Return success - MOVQ $0x00000000, ret+24(FP) - RET - - // Return with match length error -sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch: - MOVQ $0x00000001, ret+24(FP) - RET - - // Return with match too long error -sequenceDecs_decode_56_amd64_error_match_len_too_big: - MOVQ $0x00000002, ret+24(FP) - RET - - // Return with match offset too long error - MOVQ $0x00000003, ret+24(FP) - RET - - // Return with not enough literals error -error_not_enough_literals: - MOVQ $0x00000004, ret+24(FP) - RET - - // Return with overread error -error_overread: - MOVQ $0x00000006, ret+24(FP) - RET - -// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int -// Requires: BMI, BMI2, CMOV -TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 - MOVQ br+8(FP), BX - MOVQ 24(BX), AX - MOVBQZX 40(BX), DX - MOVQ (BX), CX - MOVQ 32(BX), BX - ADDQ BX, CX - MOVQ CX, (SP) - MOVQ ctx+16(FP), CX - MOVQ 72(CX), SI - MOVQ 80(CX), DI - MOVQ 88(CX), R8 - MOVQ 104(CX), R9 - MOVQ s+0(FP), CX - MOVQ 144(CX), R10 - MOVQ 152(CX), R11 - MOVQ 160(CX), R12 - -sequenceDecs_decode_bmi2_main_loop: - MOVQ (SP), R13 - - // Fill bitreader to have enough for the offset and match length. - CMPQ BX, $0x08 - JL sequenceDecs_decode_bmi2_fill_byte_by_byte - MOVQ DX, CX - SHRQ $0x03, CX - SUBQ CX, R13 - MOVQ (R13), AX - SUBQ CX, BX - ANDQ $0x07, DX - JMP sequenceDecs_decode_bmi2_fill_end - -sequenceDecs_decode_bmi2_fill_byte_by_byte: - CMPQ BX, $0x00 - JLE sequenceDecs_decode_bmi2_fill_check_overread - CMPQ DX, $0x07 - JLE sequenceDecs_decode_bmi2_fill_end - SHLQ $0x08, AX - SUBQ $0x01, R13 - SUBQ $0x01, BX - SUBQ $0x08, DX - MOVBQZX (R13), CX - ORQ CX, AX - JMP sequenceDecs_decode_bmi2_fill_byte_by_byte - -sequenceDecs_decode_bmi2_fill_check_overread: - CMPQ DX, $0x40 - JA error_overread - -sequenceDecs_decode_bmi2_fill_end: - // Update offset - MOVQ $0x00000808, CX - BEXTRQ CX, R8, R14 - MOVQ AX, R15 - LEAQ (DX)(R14*1), CX - ROLQ CL, R15 - BZHIQ R14, R15, R15 - MOVQ CX, DX - MOVQ R8, CX - SHRQ $0x20, CX - ADDQ R15, CX - MOVQ CX, 16(R9) - - // Update match length - MOVQ $0x00000808, CX - BEXTRQ CX, DI, R14 - MOVQ AX, R15 - LEAQ (DX)(R14*1), CX - ROLQ CL, R15 - BZHIQ R14, R15, R15 - MOVQ CX, DX - MOVQ DI, CX - SHRQ $0x20, CX - ADDQ R15, CX - MOVQ CX, 8(R9) - - // Fill bitreader to have enough for the remaining - CMPQ BX, $0x08 - JL sequenceDecs_decode_bmi2_fill_2_byte_by_byte - MOVQ DX, CX - SHRQ $0x03, CX - SUBQ CX, R13 - MOVQ (R13), AX - SUBQ CX, BX - ANDQ $0x07, DX - JMP sequenceDecs_decode_bmi2_fill_2_end - -sequenceDecs_decode_bmi2_fill_2_byte_by_byte: - CMPQ BX, $0x00 - JLE sequenceDecs_decode_bmi2_fill_2_check_overread - CMPQ DX, $0x07 - JLE sequenceDecs_decode_bmi2_fill_2_end - SHLQ $0x08, AX - SUBQ $0x01, R13 - SUBQ $0x01, BX - SUBQ $0x08, DX - MOVBQZX (R13), CX - ORQ CX, AX - JMP sequenceDecs_decode_bmi2_fill_2_byte_by_byte - -sequenceDecs_decode_bmi2_fill_2_check_overread: - CMPQ DX, $0x40 - JA error_overread - -sequenceDecs_decode_bmi2_fill_2_end: - // Update literal length - MOVQ $0x00000808, CX - BEXTRQ CX, SI, R14 - MOVQ AX, R15 - LEAQ (DX)(R14*1), CX - ROLQ CL, R15 - BZHIQ R14, R15, R15 - MOVQ CX, DX - MOVQ SI, CX - SHRQ $0x20, CX - ADDQ R15, CX - MOVQ CX, (R9) - - // Fill bitreader for state updates - MOVQ R13, (SP) - MOVQ $0x00000808, CX - BEXTRQ CX, R8, R13 - MOVQ ctx+16(FP), CX - CMPQ 96(CX), $0x00 - JZ sequenceDecs_decode_bmi2_skip_update - LEAQ (SI)(DI*1), R14 - ADDQ R8, R14 - MOVBQZX R14, R14 - LEAQ (DX)(R14*1), CX - MOVQ AX, R15 - MOVQ CX, DX - ROLQ CL, R15 - BZHIQ R14, R15, R15 - - // Update Offset State - BZHIQ R8, R15, CX - SHRXQ R8, R15, R15 - SHRL $0x10, R8 - ADDQ CX, R8 - - // Load ctx.ofTable - MOVQ ctx+16(FP), CX - MOVQ 48(CX), CX - MOVQ (CX)(R8*8), R8 - - // Update Match Length State - BZHIQ DI, R15, CX - SHRXQ DI, R15, R15 - SHRL $0x10, DI - ADDQ CX, DI - - // Load ctx.mlTable - MOVQ ctx+16(FP), CX - MOVQ 24(CX), CX - MOVQ (CX)(DI*8), DI - - // Update Literal Length State - BZHIQ SI, R15, CX - SHRL $0x10, SI - ADDQ CX, SI - - // Load ctx.llTable - MOVQ ctx+16(FP), CX - MOVQ (CX), CX - MOVQ (CX)(SI*8), SI - -sequenceDecs_decode_bmi2_skip_update: - // Adjust offset - MOVQ 16(R9), CX - CMPQ R13, $0x01 - JBE sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0 - MOVQ R11, R12 - MOVQ R10, R11 - MOVQ CX, R10 - JMP sequenceDecs_decode_bmi2_after_adjust - -sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0: - CMPQ (R9), $0x00000000 - JNE sequenceDecs_decode_bmi2_adjust_offset_maybezero - INCQ CX - JMP sequenceDecs_decode_bmi2_adjust_offset_nonzero - -sequenceDecs_decode_bmi2_adjust_offset_maybezero: - TESTQ CX, CX - JNZ sequenceDecs_decode_bmi2_adjust_offset_nonzero - MOVQ R10, CX - JMP sequenceDecs_decode_bmi2_after_adjust - -sequenceDecs_decode_bmi2_adjust_offset_nonzero: - CMPQ CX, $0x01 - JB sequenceDecs_decode_bmi2_adjust_zero - JEQ sequenceDecs_decode_bmi2_adjust_one - CMPQ CX, $0x02 - JA sequenceDecs_decode_bmi2_adjust_three - JMP sequenceDecs_decode_bmi2_adjust_two - -sequenceDecs_decode_bmi2_adjust_zero: - MOVQ R10, R13 - JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid - -sequenceDecs_decode_bmi2_adjust_one: - MOVQ R11, R13 - JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid - -sequenceDecs_decode_bmi2_adjust_two: - MOVQ R12, R13 - JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid - -sequenceDecs_decode_bmi2_adjust_three: - LEAQ -1(R10), R13 - -sequenceDecs_decode_bmi2_adjust_test_temp_valid: - TESTQ R13, R13 - JNZ sequenceDecs_decode_bmi2_adjust_temp_valid - MOVQ $0x00000001, R13 - -sequenceDecs_decode_bmi2_adjust_temp_valid: - CMPQ CX, $0x01 - CMOVQNE R11, R12 - MOVQ R10, R11 - MOVQ R13, R10 - MOVQ R13, CX - -sequenceDecs_decode_bmi2_after_adjust: - MOVQ CX, 16(R9) - - // Check values - MOVQ 8(R9), R13 - MOVQ (R9), R14 - LEAQ (R13)(R14*1), R15 - MOVQ s+0(FP), BP - ADDQ R15, 256(BP) - MOVQ ctx+16(FP), R15 - SUBQ R14, 128(R15) - JS error_not_enough_literals - CMPQ R13, $0x00020002 - JA sequenceDecs_decode_bmi2_error_match_len_too_big - TESTQ CX, CX - JNZ sequenceDecs_decode_bmi2_match_len_ofs_ok - TESTQ R13, R13 - JNZ sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch - -sequenceDecs_decode_bmi2_match_len_ofs_ok: - ADDQ $0x18, R9 - MOVQ ctx+16(FP), CX - DECQ 96(CX) - JNS sequenceDecs_decode_bmi2_main_loop - MOVQ s+0(FP), CX - MOVQ R10, 144(CX) - MOVQ R11, 152(CX) - MOVQ R12, 160(CX) - MOVQ br+8(FP), CX - MOVQ AX, 24(CX) - MOVB DL, 40(CX) - MOVQ BX, 32(CX) - - // Return success - MOVQ $0x00000000, ret+24(FP) - RET - - // Return with match length error -sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch: - MOVQ $0x00000001, ret+24(FP) - RET - - // Return with match too long error -sequenceDecs_decode_bmi2_error_match_len_too_big: - MOVQ $0x00000002, ret+24(FP) - RET - - // Return with match offset too long error - MOVQ $0x00000003, ret+24(FP) - RET - - // Return with not enough literals error -error_not_enough_literals: - MOVQ $0x00000004, ret+24(FP) - RET - - // Return with overread error -error_overread: - MOVQ $0x00000006, ret+24(FP) - RET - -// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int -// Requires: BMI, BMI2, CMOV -TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 - MOVQ br+8(FP), BX - MOVQ 24(BX), AX - MOVBQZX 40(BX), DX - MOVQ (BX), CX - MOVQ 32(BX), BX - ADDQ BX, CX - MOVQ CX, (SP) - MOVQ ctx+16(FP), CX - MOVQ 72(CX), SI - MOVQ 80(CX), DI - MOVQ 88(CX), R8 - MOVQ 104(CX), R9 - MOVQ s+0(FP), CX - MOVQ 144(CX), R10 - MOVQ 152(CX), R11 - MOVQ 160(CX), R12 - -sequenceDecs_decode_56_bmi2_main_loop: - MOVQ (SP), R13 - - // Fill bitreader to have enough for the offset and match length. - CMPQ BX, $0x08 - JL sequenceDecs_decode_56_bmi2_fill_byte_by_byte - MOVQ DX, CX - SHRQ $0x03, CX - SUBQ CX, R13 - MOVQ (R13), AX - SUBQ CX, BX - ANDQ $0x07, DX - JMP sequenceDecs_decode_56_bmi2_fill_end - -sequenceDecs_decode_56_bmi2_fill_byte_by_byte: - CMPQ BX, $0x00 - JLE sequenceDecs_decode_56_bmi2_fill_check_overread - CMPQ DX, $0x07 - JLE sequenceDecs_decode_56_bmi2_fill_end - SHLQ $0x08, AX - SUBQ $0x01, R13 - SUBQ $0x01, BX - SUBQ $0x08, DX - MOVBQZX (R13), CX - ORQ CX, AX - JMP sequenceDecs_decode_56_bmi2_fill_byte_by_byte - -sequenceDecs_decode_56_bmi2_fill_check_overread: - CMPQ DX, $0x40 - JA error_overread - -sequenceDecs_decode_56_bmi2_fill_end: - // Update offset - MOVQ $0x00000808, CX - BEXTRQ CX, R8, R14 - MOVQ AX, R15 - LEAQ (DX)(R14*1), CX - ROLQ CL, R15 - BZHIQ R14, R15, R15 - MOVQ CX, DX - MOVQ R8, CX - SHRQ $0x20, CX - ADDQ R15, CX - MOVQ CX, 16(R9) - - // Update match length - MOVQ $0x00000808, CX - BEXTRQ CX, DI, R14 - MOVQ AX, R15 - LEAQ (DX)(R14*1), CX - ROLQ CL, R15 - BZHIQ R14, R15, R15 - MOVQ CX, DX - MOVQ DI, CX - SHRQ $0x20, CX - ADDQ R15, CX - MOVQ CX, 8(R9) - - // Update literal length - MOVQ $0x00000808, CX - BEXTRQ CX, SI, R14 - MOVQ AX, R15 - LEAQ (DX)(R14*1), CX - ROLQ CL, R15 - BZHIQ R14, R15, R15 - MOVQ CX, DX - MOVQ SI, CX - SHRQ $0x20, CX - ADDQ R15, CX - MOVQ CX, (R9) - - // Fill bitreader for state updates - MOVQ R13, (SP) - MOVQ $0x00000808, CX - BEXTRQ CX, R8, R13 - MOVQ ctx+16(FP), CX - CMPQ 96(CX), $0x00 - JZ sequenceDecs_decode_56_bmi2_skip_update - LEAQ (SI)(DI*1), R14 - ADDQ R8, R14 - MOVBQZX R14, R14 - LEAQ (DX)(R14*1), CX - MOVQ AX, R15 - MOVQ CX, DX - ROLQ CL, R15 - BZHIQ R14, R15, R15 - - // Update Offset State - BZHIQ R8, R15, CX - SHRXQ R8, R15, R15 - SHRL $0x10, R8 - ADDQ CX, R8 - - // Load ctx.ofTable - MOVQ ctx+16(FP), CX - MOVQ 48(CX), CX - MOVQ (CX)(R8*8), R8 - - // Update Match Length State - BZHIQ DI, R15, CX - SHRXQ DI, R15, R15 - SHRL $0x10, DI - ADDQ CX, DI - - // Load ctx.mlTable - MOVQ ctx+16(FP), CX - MOVQ 24(CX), CX - MOVQ (CX)(DI*8), DI - - // Update Literal Length State - BZHIQ SI, R15, CX - SHRL $0x10, SI - ADDQ CX, SI - - // Load ctx.llTable - MOVQ ctx+16(FP), CX - MOVQ (CX), CX - MOVQ (CX)(SI*8), SI - -sequenceDecs_decode_56_bmi2_skip_update: - // Adjust offset - MOVQ 16(R9), CX - CMPQ R13, $0x01 - JBE sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0 - MOVQ R11, R12 - MOVQ R10, R11 - MOVQ CX, R10 - JMP sequenceDecs_decode_56_bmi2_after_adjust - -sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0: - CMPQ (R9), $0x00000000 - JNE sequenceDecs_decode_56_bmi2_adjust_offset_maybezero - INCQ CX - JMP sequenceDecs_decode_56_bmi2_adjust_offset_nonzero - -sequenceDecs_decode_56_bmi2_adjust_offset_maybezero: - TESTQ CX, CX - JNZ sequenceDecs_decode_56_bmi2_adjust_offset_nonzero - MOVQ R10, CX - JMP sequenceDecs_decode_56_bmi2_after_adjust - -sequenceDecs_decode_56_bmi2_adjust_offset_nonzero: - CMPQ CX, $0x01 - JB sequenceDecs_decode_56_bmi2_adjust_zero - JEQ sequenceDecs_decode_56_bmi2_adjust_one - CMPQ CX, $0x02 - JA sequenceDecs_decode_56_bmi2_adjust_three - JMP sequenceDecs_decode_56_bmi2_adjust_two - -sequenceDecs_decode_56_bmi2_adjust_zero: - MOVQ R10, R13 - JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid - -sequenceDecs_decode_56_bmi2_adjust_one: - MOVQ R11, R13 - JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid - -sequenceDecs_decode_56_bmi2_adjust_two: - MOVQ R12, R13 - JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid - -sequenceDecs_decode_56_bmi2_adjust_three: - LEAQ -1(R10), R13 - -sequenceDecs_decode_56_bmi2_adjust_test_temp_valid: - TESTQ R13, R13 - JNZ sequenceDecs_decode_56_bmi2_adjust_temp_valid - MOVQ $0x00000001, R13 - -sequenceDecs_decode_56_bmi2_adjust_temp_valid: - CMPQ CX, $0x01 - CMOVQNE R11, R12 - MOVQ R10, R11 - MOVQ R13, R10 - MOVQ R13, CX - -sequenceDecs_decode_56_bmi2_after_adjust: - MOVQ CX, 16(R9) - - // Check values - MOVQ 8(R9), R13 - MOVQ (R9), R14 - LEAQ (R13)(R14*1), R15 - MOVQ s+0(FP), BP - ADDQ R15, 256(BP) - MOVQ ctx+16(FP), R15 - SUBQ R14, 128(R15) - JS error_not_enough_literals - CMPQ R13, $0x00020002 - JA sequenceDecs_decode_56_bmi2_error_match_len_too_big - TESTQ CX, CX - JNZ sequenceDecs_decode_56_bmi2_match_len_ofs_ok - TESTQ R13, R13 - JNZ sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch - -sequenceDecs_decode_56_bmi2_match_len_ofs_ok: - ADDQ $0x18, R9 - MOVQ ctx+16(FP), CX - DECQ 96(CX) - JNS sequenceDecs_decode_56_bmi2_main_loop - MOVQ s+0(FP), CX - MOVQ R10, 144(CX) - MOVQ R11, 152(CX) - MOVQ R12, 160(CX) - MOVQ br+8(FP), CX - MOVQ AX, 24(CX) - MOVB DL, 40(CX) - MOVQ BX, 32(CX) - - // Return success - MOVQ $0x00000000, ret+24(FP) - RET - - // Return with match length error -sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch: - MOVQ $0x00000001, ret+24(FP) - RET - - // Return with match too long error -sequenceDecs_decode_56_bmi2_error_match_len_too_big: - MOVQ $0x00000002, ret+24(FP) - RET - - // Return with match offset too long error - MOVQ $0x00000003, ret+24(FP) - RET - - // Return with not enough literals error -error_not_enough_literals: - MOVQ $0x00000004, ret+24(FP) - RET - - // Return with overread error -error_overread: - MOVQ $0x00000006, ret+24(FP) - RET - -// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool -// Requires: SSE -TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9 - MOVQ ctx+0(FP), R10 - MOVQ 8(R10), CX - TESTQ CX, CX - JZ empty_seqs - MOVQ (R10), AX - MOVQ 24(R10), DX - MOVQ 32(R10), BX - MOVQ 80(R10), SI - MOVQ 104(R10), DI - MOVQ 120(R10), R8 - MOVQ 56(R10), R9 - MOVQ 64(R10), R10 - ADDQ R10, R9 - - // seqsBase += 24 * seqIndex - LEAQ (DX)(DX*2), R11 - SHLQ $0x03, R11 - ADDQ R11, AX - - // outBase += outPosition - ADDQ DI, BX - -main_loop: - MOVQ (AX), R11 - MOVQ 16(AX), R12 - MOVQ 8(AX), R13 - - // Copy literals - TESTQ R11, R11 - JZ check_offset - XORQ R14, R14 - -copy_1: - MOVUPS (SI)(R14*1), X0 - MOVUPS X0, (BX)(R14*1) - ADDQ $0x10, R14 - CMPQ R14, R11 - JB copy_1 - ADDQ R11, SI - ADDQ R11, BX - ADDQ R11, DI - - // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) -check_offset: - LEAQ (DI)(R10*1), R11 - CMPQ R12, R11 - JG error_match_off_too_big - CMPQ R12, R8 - JG error_match_off_too_big - - // Copy match from history - MOVQ R12, R11 - SUBQ DI, R11 - JLS copy_match - MOVQ R9, R14 - SUBQ R11, R14 - CMPQ R13, R11 - JG copy_all_from_history - MOVQ R13, R11 - SUBQ $0x10, R11 - JB copy_4_small - -copy_4_loop: - MOVUPS (R14), X0 - MOVUPS X0, (BX) - ADDQ $0x10, R14 - ADDQ $0x10, BX - SUBQ $0x10, R11 - JAE copy_4_loop - LEAQ 16(R14)(R11*1), R14 - LEAQ 16(BX)(R11*1), BX - MOVUPS -16(R14), X0 - MOVUPS X0, -16(BX) - JMP copy_4_end - -copy_4_small: - CMPQ R13, $0x03 - JE copy_4_move_3 - CMPQ R13, $0x08 - JB copy_4_move_4through7 - JMP copy_4_move_8through16 - -copy_4_move_3: - MOVW (R14), R11 - MOVB 2(R14), R12 - MOVW R11, (BX) - MOVB R12, 2(BX) - ADDQ R13, R14 - ADDQ R13, BX - JMP copy_4_end - -copy_4_move_4through7: - MOVL (R14), R11 - MOVL -4(R14)(R13*1), R12 - MOVL R11, (BX) - MOVL R12, -4(BX)(R13*1) - ADDQ R13, R14 - ADDQ R13, BX - JMP copy_4_end - -copy_4_move_8through16: - MOVQ (R14), R11 - MOVQ -8(R14)(R13*1), R12 - MOVQ R11, (BX) - MOVQ R12, -8(BX)(R13*1) - ADDQ R13, R14 - ADDQ R13, BX - -copy_4_end: - ADDQ R13, DI - ADDQ $0x18, AX - INCQ DX - CMPQ DX, CX - JB main_loop - JMP loop_finished - -copy_all_from_history: - MOVQ R11, R15 - SUBQ $0x10, R15 - JB copy_5_small - -copy_5_loop: - MOVUPS (R14), X0 - MOVUPS X0, (BX) - ADDQ $0x10, R14 - ADDQ $0x10, BX - SUBQ $0x10, R15 - JAE copy_5_loop - LEAQ 16(R14)(R15*1), R14 - LEAQ 16(BX)(R15*1), BX - MOVUPS -16(R14), X0 - MOVUPS X0, -16(BX) - JMP copy_5_end - -copy_5_small: - CMPQ R11, $0x03 - JE copy_5_move_3 - JB copy_5_move_1or2 - CMPQ R11, $0x08 - JB copy_5_move_4through7 - JMP copy_5_move_8through16 - -copy_5_move_1or2: - MOVB (R14), R15 - MOVB -1(R14)(R11*1), BP - MOVB R15, (BX) - MOVB BP, -1(BX)(R11*1) - ADDQ R11, R14 - ADDQ R11, BX - JMP copy_5_end - -copy_5_move_3: - MOVW (R14), R15 - MOVB 2(R14), BP - MOVW R15, (BX) - MOVB BP, 2(BX) - ADDQ R11, R14 - ADDQ R11, BX - JMP copy_5_end - -copy_5_move_4through7: - MOVL (R14), R15 - MOVL -4(R14)(R11*1), BP - MOVL R15, (BX) - MOVL BP, -4(BX)(R11*1) - ADDQ R11, R14 - ADDQ R11, BX - JMP copy_5_end - -copy_5_move_8through16: - MOVQ (R14), R15 - MOVQ -8(R14)(R11*1), BP - MOVQ R15, (BX) - MOVQ BP, -8(BX)(R11*1) - ADDQ R11, R14 - ADDQ R11, BX - -copy_5_end: - ADDQ R11, DI - SUBQ R11, R13 - - // Copy match from the current buffer -copy_match: - MOVQ BX, R11 - SUBQ R12, R11 - - // ml <= mo - CMPQ R13, R12 - JA copy_overlapping_match - - // Copy non-overlapping match - ADDQ R13, DI - MOVQ BX, R12 - ADDQ R13, BX - -copy_2: - MOVUPS (R11), X0 - MOVUPS X0, (R12) - ADDQ $0x10, R11 - ADDQ $0x10, R12 - SUBQ $0x10, R13 - JHI copy_2 - JMP handle_loop - - // Copy overlapping match -copy_overlapping_match: - ADDQ R13, DI - -copy_slow_3: - MOVB (R11), R12 - MOVB R12, (BX) - INCQ R11 - INCQ BX - DECQ R13 - JNZ copy_slow_3 - -handle_loop: - ADDQ $0x18, AX - INCQ DX - CMPQ DX, CX - JB main_loop - -loop_finished: - // Return value - MOVB $0x01, ret+8(FP) - - // Update the context - MOVQ ctx+0(FP), AX - MOVQ DX, 24(AX) - MOVQ DI, 104(AX) - SUBQ 80(AX), SI - MOVQ SI, 112(AX) - RET - -error_match_off_too_big: - // Return value - MOVB $0x00, ret+8(FP) - - // Update the context - MOVQ ctx+0(FP), AX - MOVQ DX, 24(AX) - MOVQ DI, 104(AX) - SUBQ 80(AX), SI - MOVQ SI, 112(AX) - RET - -empty_seqs: - // Return value - MOVB $0x01, ret+8(FP) - RET - -// func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool -// Requires: SSE -TEXT ·sequenceDecs_executeSimple_safe_amd64(SB), $8-9 - MOVQ ctx+0(FP), R10 - MOVQ 8(R10), CX - TESTQ CX, CX - JZ empty_seqs - MOVQ (R10), AX - MOVQ 24(R10), DX - MOVQ 32(R10), BX - MOVQ 80(R10), SI - MOVQ 104(R10), DI - MOVQ 120(R10), R8 - MOVQ 56(R10), R9 - MOVQ 64(R10), R10 - ADDQ R10, R9 - - // seqsBase += 24 * seqIndex - LEAQ (DX)(DX*2), R11 - SHLQ $0x03, R11 - ADDQ R11, AX - - // outBase += outPosition - ADDQ DI, BX - -main_loop: - MOVQ (AX), R11 - MOVQ 16(AX), R12 - MOVQ 8(AX), R13 - - // Copy literals - TESTQ R11, R11 - JZ check_offset - MOVQ R11, R14 - SUBQ $0x10, R14 - JB copy_1_small - -copy_1_loop: - MOVUPS (SI), X0 - MOVUPS X0, (BX) - ADDQ $0x10, SI - ADDQ $0x10, BX - SUBQ $0x10, R14 - JAE copy_1_loop - LEAQ 16(SI)(R14*1), SI - LEAQ 16(BX)(R14*1), BX - MOVUPS -16(SI), X0 - MOVUPS X0, -16(BX) - JMP copy_1_end - -copy_1_small: - CMPQ R11, $0x03 - JE copy_1_move_3 - JB copy_1_move_1or2 - CMPQ R11, $0x08 - JB copy_1_move_4through7 - JMP copy_1_move_8through16 - -copy_1_move_1or2: - MOVB (SI), R14 - MOVB -1(SI)(R11*1), R15 - MOVB R14, (BX) - MOVB R15, -1(BX)(R11*1) - ADDQ R11, SI - ADDQ R11, BX - JMP copy_1_end - -copy_1_move_3: - MOVW (SI), R14 - MOVB 2(SI), R15 - MOVW R14, (BX) - MOVB R15, 2(BX) - ADDQ R11, SI - ADDQ R11, BX - JMP copy_1_end - -copy_1_move_4through7: - MOVL (SI), R14 - MOVL -4(SI)(R11*1), R15 - MOVL R14, (BX) - MOVL R15, -4(BX)(R11*1) - ADDQ R11, SI - ADDQ R11, BX - JMP copy_1_end - -copy_1_move_8through16: - MOVQ (SI), R14 - MOVQ -8(SI)(R11*1), R15 - MOVQ R14, (BX) - MOVQ R15, -8(BX)(R11*1) - ADDQ R11, SI - ADDQ R11, BX - -copy_1_end: - ADDQ R11, DI - - // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) -check_offset: - LEAQ (DI)(R10*1), R11 - CMPQ R12, R11 - JG error_match_off_too_big - CMPQ R12, R8 - JG error_match_off_too_big - - // Copy match from history - MOVQ R12, R11 - SUBQ DI, R11 - JLS copy_match - MOVQ R9, R14 - SUBQ R11, R14 - CMPQ R13, R11 - JG copy_all_from_history - MOVQ R13, R11 - SUBQ $0x10, R11 - JB copy_4_small - -copy_4_loop: - MOVUPS (R14), X0 - MOVUPS X0, (BX) - ADDQ $0x10, R14 - ADDQ $0x10, BX - SUBQ $0x10, R11 - JAE copy_4_loop - LEAQ 16(R14)(R11*1), R14 - LEAQ 16(BX)(R11*1), BX - MOVUPS -16(R14), X0 - MOVUPS X0, -16(BX) - JMP copy_4_end - -copy_4_small: - CMPQ R13, $0x03 - JE copy_4_move_3 - CMPQ R13, $0x08 - JB copy_4_move_4through7 - JMP copy_4_move_8through16 - -copy_4_move_3: - MOVW (R14), R11 - MOVB 2(R14), R12 - MOVW R11, (BX) - MOVB R12, 2(BX) - ADDQ R13, R14 - ADDQ R13, BX - JMP copy_4_end - -copy_4_move_4through7: - MOVL (R14), R11 - MOVL -4(R14)(R13*1), R12 - MOVL R11, (BX) - MOVL R12, -4(BX)(R13*1) - ADDQ R13, R14 - ADDQ R13, BX - JMP copy_4_end - -copy_4_move_8through16: - MOVQ (R14), R11 - MOVQ -8(R14)(R13*1), R12 - MOVQ R11, (BX) - MOVQ R12, -8(BX)(R13*1) - ADDQ R13, R14 - ADDQ R13, BX - -copy_4_end: - ADDQ R13, DI - ADDQ $0x18, AX - INCQ DX - CMPQ DX, CX - JB main_loop - JMP loop_finished - -copy_all_from_history: - MOVQ R11, R15 - SUBQ $0x10, R15 - JB copy_5_small - -copy_5_loop: - MOVUPS (R14), X0 - MOVUPS X0, (BX) - ADDQ $0x10, R14 - ADDQ $0x10, BX - SUBQ $0x10, R15 - JAE copy_5_loop - LEAQ 16(R14)(R15*1), R14 - LEAQ 16(BX)(R15*1), BX - MOVUPS -16(R14), X0 - MOVUPS X0, -16(BX) - JMP copy_5_end - -copy_5_small: - CMPQ R11, $0x03 - JE copy_5_move_3 - JB copy_5_move_1or2 - CMPQ R11, $0x08 - JB copy_5_move_4through7 - JMP copy_5_move_8through16 - -copy_5_move_1or2: - MOVB (R14), R15 - MOVB -1(R14)(R11*1), BP - MOVB R15, (BX) - MOVB BP, -1(BX)(R11*1) - ADDQ R11, R14 - ADDQ R11, BX - JMP copy_5_end - -copy_5_move_3: - MOVW (R14), R15 - MOVB 2(R14), BP - MOVW R15, (BX) - MOVB BP, 2(BX) - ADDQ R11, R14 - ADDQ R11, BX - JMP copy_5_end - -copy_5_move_4through7: - MOVL (R14), R15 - MOVL -4(R14)(R11*1), BP - MOVL R15, (BX) - MOVL BP, -4(BX)(R11*1) - ADDQ R11, R14 - ADDQ R11, BX - JMP copy_5_end - -copy_5_move_8through16: - MOVQ (R14), R15 - MOVQ -8(R14)(R11*1), BP - MOVQ R15, (BX) - MOVQ BP, -8(BX)(R11*1) - ADDQ R11, R14 - ADDQ R11, BX - -copy_5_end: - ADDQ R11, DI - SUBQ R11, R13 - - // Copy match from the current buffer -copy_match: - MOVQ BX, R11 - SUBQ R12, R11 - - // ml <= mo - CMPQ R13, R12 - JA copy_overlapping_match - - // Copy non-overlapping match - ADDQ R13, DI - MOVQ R13, R12 - SUBQ $0x10, R12 - JB copy_2_small - -copy_2_loop: - MOVUPS (R11), X0 - MOVUPS X0, (BX) - ADDQ $0x10, R11 - ADDQ $0x10, BX - SUBQ $0x10, R12 - JAE copy_2_loop - LEAQ 16(R11)(R12*1), R11 - LEAQ 16(BX)(R12*1), BX - MOVUPS -16(R11), X0 - MOVUPS X0, -16(BX) - JMP copy_2_end - -copy_2_small: - CMPQ R13, $0x03 - JE copy_2_move_3 - JB copy_2_move_1or2 - CMPQ R13, $0x08 - JB copy_2_move_4through7 - JMP copy_2_move_8through16 - -copy_2_move_1or2: - MOVB (R11), R12 - MOVB -1(R11)(R13*1), R14 - MOVB R12, (BX) - MOVB R14, -1(BX)(R13*1) - ADDQ R13, R11 - ADDQ R13, BX - JMP copy_2_end - -copy_2_move_3: - MOVW (R11), R12 - MOVB 2(R11), R14 - MOVW R12, (BX) - MOVB R14, 2(BX) - ADDQ R13, R11 - ADDQ R13, BX - JMP copy_2_end - -copy_2_move_4through7: - MOVL (R11), R12 - MOVL -4(R11)(R13*1), R14 - MOVL R12, (BX) - MOVL R14, -4(BX)(R13*1) - ADDQ R13, R11 - ADDQ R13, BX - JMP copy_2_end - -copy_2_move_8through16: - MOVQ (R11), R12 - MOVQ -8(R11)(R13*1), R14 - MOVQ R12, (BX) - MOVQ R14, -8(BX)(R13*1) - ADDQ R13, R11 - ADDQ R13, BX - -copy_2_end: - JMP handle_loop - - // Copy overlapping match -copy_overlapping_match: - ADDQ R13, DI - -copy_slow_3: - MOVB (R11), R12 - MOVB R12, (BX) - INCQ R11 - INCQ BX - DECQ R13 - JNZ copy_slow_3 - -handle_loop: - ADDQ $0x18, AX - INCQ DX - CMPQ DX, CX - JB main_loop - -loop_finished: - // Return value - MOVB $0x01, ret+8(FP) - - // Update the context - MOVQ ctx+0(FP), AX - MOVQ DX, 24(AX) - MOVQ DI, 104(AX) - SUBQ 80(AX), SI - MOVQ SI, 112(AX) - RET - -error_match_off_too_big: - // Return value - MOVB $0x00, ret+8(FP) - - // Update the context - MOVQ ctx+0(FP), AX - MOVQ DX, 24(AX) - MOVQ DI, 104(AX) - SUBQ 80(AX), SI - MOVQ SI, 112(AX) - RET - -empty_seqs: - // Return value - MOVB $0x01, ret+8(FP) - RET - -// func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int -// Requires: CMOV, SSE -TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 - MOVQ br+8(FP), CX - MOVQ 24(CX), DX - MOVBQZX 40(CX), BX - MOVQ (CX), AX - MOVQ 32(CX), SI - ADDQ SI, AX - MOVQ AX, (SP) - MOVQ ctx+16(FP), AX - MOVQ 72(AX), DI - MOVQ 80(AX), R8 - MOVQ 88(AX), R9 - XORQ CX, CX - MOVQ CX, 8(SP) - MOVQ CX, 16(SP) - MOVQ CX, 24(SP) - MOVQ 112(AX), R10 - MOVQ 128(AX), CX - MOVQ CX, 32(SP) - MOVQ 144(AX), R11 - MOVQ 136(AX), R12 - MOVQ 200(AX), CX - MOVQ CX, 56(SP) - MOVQ 176(AX), CX - MOVQ CX, 48(SP) - MOVQ 184(AX), AX - MOVQ AX, 40(SP) - MOVQ 40(SP), AX - ADDQ AX, 48(SP) - - // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) - ADDQ R10, 32(SP) - - // outBase += outPosition - ADDQ R12, R10 - -sequenceDecs_decodeSync_amd64_main_loop: - MOVQ (SP), R13 - - // Fill bitreader to have enough for the offset and match length. - CMPQ SI, $0x08 - JL sequenceDecs_decodeSync_amd64_fill_byte_by_byte - MOVQ BX, AX - SHRQ $0x03, AX - SUBQ AX, R13 - MOVQ (R13), DX - SUBQ AX, SI - ANDQ $0x07, BX - JMP sequenceDecs_decodeSync_amd64_fill_end - -sequenceDecs_decodeSync_amd64_fill_byte_by_byte: - CMPQ SI, $0x00 - JLE sequenceDecs_decodeSync_amd64_fill_check_overread - CMPQ BX, $0x07 - JLE sequenceDecs_decodeSync_amd64_fill_end - SHLQ $0x08, DX - SUBQ $0x01, R13 - SUBQ $0x01, SI - SUBQ $0x08, BX - MOVBQZX (R13), AX - ORQ AX, DX - JMP sequenceDecs_decodeSync_amd64_fill_byte_by_byte - -sequenceDecs_decodeSync_amd64_fill_check_overread: - CMPQ BX, $0x40 - JA error_overread - -sequenceDecs_decodeSync_amd64_fill_end: - // Update offset - MOVQ R9, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - SHRQ $0x20, AX - TESTQ CX, CX - JZ sequenceDecs_decodeSync_amd64_of_update_zero - ADDQ CX, BX - CMPQ BX, $0x40 - JA sequenceDecs_decodeSync_amd64_of_update_zero - CMPQ CX, $0x40 - JAE sequenceDecs_decodeSync_amd64_of_update_zero - NEGQ CX - SHRQ CL, R14 - ADDQ R14, AX - -sequenceDecs_decodeSync_amd64_of_update_zero: - MOVQ AX, 8(SP) - - // Update match length - MOVQ R8, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - SHRQ $0x20, AX - TESTQ CX, CX - JZ sequenceDecs_decodeSync_amd64_ml_update_zero - ADDQ CX, BX - CMPQ BX, $0x40 - JA sequenceDecs_decodeSync_amd64_ml_update_zero - CMPQ CX, $0x40 - JAE sequenceDecs_decodeSync_amd64_ml_update_zero - NEGQ CX - SHRQ CL, R14 - ADDQ R14, AX - -sequenceDecs_decodeSync_amd64_ml_update_zero: - MOVQ AX, 16(SP) - - // Fill bitreader to have enough for the remaining - CMPQ SI, $0x08 - JL sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte - MOVQ BX, AX - SHRQ $0x03, AX - SUBQ AX, R13 - MOVQ (R13), DX - SUBQ AX, SI - ANDQ $0x07, BX - JMP sequenceDecs_decodeSync_amd64_fill_2_end - -sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte: - CMPQ SI, $0x00 - JLE sequenceDecs_decodeSync_amd64_fill_2_check_overread - CMPQ BX, $0x07 - JLE sequenceDecs_decodeSync_amd64_fill_2_end - SHLQ $0x08, DX - SUBQ $0x01, R13 - SUBQ $0x01, SI - SUBQ $0x08, BX - MOVBQZX (R13), AX - ORQ AX, DX - JMP sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte - -sequenceDecs_decodeSync_amd64_fill_2_check_overread: - CMPQ BX, $0x40 - JA error_overread - -sequenceDecs_decodeSync_amd64_fill_2_end: - // Update literal length - MOVQ DI, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - SHRQ $0x20, AX - TESTQ CX, CX - JZ sequenceDecs_decodeSync_amd64_ll_update_zero - ADDQ CX, BX - CMPQ BX, $0x40 - JA sequenceDecs_decodeSync_amd64_ll_update_zero - CMPQ CX, $0x40 - JAE sequenceDecs_decodeSync_amd64_ll_update_zero - NEGQ CX - SHRQ CL, R14 - ADDQ R14, AX - -sequenceDecs_decodeSync_amd64_ll_update_zero: - MOVQ AX, 24(SP) - - // Fill bitreader for state updates - MOVQ R13, (SP) - MOVQ R9, AX - SHRQ $0x08, AX - MOVBQZX AL, AX - MOVQ ctx+16(FP), CX - CMPQ 96(CX), $0x00 - JZ sequenceDecs_decodeSync_amd64_skip_update - - // Update Literal Length State - MOVBQZX DI, R13 - SHRL $0x10, DI - LEAQ (BX)(R13*1), CX - MOVQ DX, R14 - MOVQ CX, BX - ROLQ CL, R14 - MOVL $0x00000001, R15 - MOVB R13, CL - SHLL CL, R15 - DECL R15 - ANDQ R15, R14 - ADDQ R14, DI - - // Load ctx.llTable - MOVQ ctx+16(FP), CX - MOVQ (CX), CX - MOVQ (CX)(DI*8), DI - - // Update Match Length State - MOVBQZX R8, R13 - SHRL $0x10, R8 - LEAQ (BX)(R13*1), CX - MOVQ DX, R14 - MOVQ CX, BX - ROLQ CL, R14 - MOVL $0x00000001, R15 - MOVB R13, CL - SHLL CL, R15 - DECL R15 - ANDQ R15, R14 - ADDQ R14, R8 - - // Load ctx.mlTable - MOVQ ctx+16(FP), CX - MOVQ 24(CX), CX - MOVQ (CX)(R8*8), R8 - - // Update Offset State - MOVBQZX R9, R13 - SHRL $0x10, R9 - LEAQ (BX)(R13*1), CX - MOVQ DX, R14 - MOVQ CX, BX - ROLQ CL, R14 - MOVL $0x00000001, R15 - MOVB R13, CL - SHLL CL, R15 - DECL R15 - ANDQ R15, R14 - ADDQ R14, R9 - - // Load ctx.ofTable - MOVQ ctx+16(FP), CX - MOVQ 48(CX), CX - MOVQ (CX)(R9*8), R9 - -sequenceDecs_decodeSync_amd64_skip_update: - // Adjust offset - MOVQ s+0(FP), CX - MOVQ 8(SP), R13 - CMPQ AX, $0x01 - JBE sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0 - MOVUPS 144(CX), X0 - MOVQ R13, 144(CX) - MOVUPS X0, 152(CX) - JMP sequenceDecs_decodeSync_amd64_after_adjust - -sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0: - CMPQ 24(SP), $0x00000000 - JNE sequenceDecs_decodeSync_amd64_adjust_offset_maybezero - INCQ R13 - JMP sequenceDecs_decodeSync_amd64_adjust_offset_nonzero - -sequenceDecs_decodeSync_amd64_adjust_offset_maybezero: - TESTQ R13, R13 - JNZ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero - MOVQ 144(CX), R13 - JMP sequenceDecs_decodeSync_amd64_after_adjust - -sequenceDecs_decodeSync_amd64_adjust_offset_nonzero: - MOVQ R13, AX - XORQ R14, R14 - MOVQ $-1, R15 - CMPQ R13, $0x03 - CMOVQEQ R14, AX - CMOVQEQ R15, R14 - ADDQ 144(CX)(AX*8), R14 - JNZ sequenceDecs_decodeSync_amd64_adjust_temp_valid - MOVQ $0x00000001, R14 - -sequenceDecs_decodeSync_amd64_adjust_temp_valid: - CMPQ R13, $0x01 - JZ sequenceDecs_decodeSync_amd64_adjust_skip - MOVQ 152(CX), AX - MOVQ AX, 160(CX) - -sequenceDecs_decodeSync_amd64_adjust_skip: - MOVQ 144(CX), AX - MOVQ AX, 152(CX) - MOVQ R14, 144(CX) - MOVQ R14, R13 - -sequenceDecs_decodeSync_amd64_after_adjust: - MOVQ R13, 8(SP) - - // Check values - MOVQ 16(SP), AX - MOVQ 24(SP), CX - LEAQ (AX)(CX*1), R14 - MOVQ s+0(FP), R15 - ADDQ R14, 256(R15) - MOVQ ctx+16(FP), R14 - SUBQ CX, 104(R14) - JS error_not_enough_literals - CMPQ AX, $0x00020002 - JA sequenceDecs_decodeSync_amd64_error_match_len_too_big - TESTQ R13, R13 - JNZ sequenceDecs_decodeSync_amd64_match_len_ofs_ok - TESTQ AX, AX - JNZ sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch - -sequenceDecs_decodeSync_amd64_match_len_ofs_ok: - MOVQ 24(SP), AX - MOVQ 8(SP), CX - MOVQ 16(SP), R13 - - // Check if we have enough space in s.out - LEAQ (AX)(R13*1), R14 - ADDQ R10, R14 - CMPQ R14, 32(SP) - JA error_not_enough_space - - // Copy literals - TESTQ AX, AX - JZ check_offset - XORQ R14, R14 - -copy_1: - MOVUPS (R11)(R14*1), X0 - MOVUPS X0, (R10)(R14*1) - ADDQ $0x10, R14 - CMPQ R14, AX - JB copy_1 - ADDQ AX, R11 - ADDQ AX, R10 - ADDQ AX, R12 - - // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) -check_offset: - MOVQ R12, AX - ADDQ 40(SP), AX - CMPQ CX, AX - JG error_match_off_too_big - CMPQ CX, 56(SP) - JG error_match_off_too_big - - // Copy match from history - MOVQ CX, AX - SUBQ R12, AX - JLS copy_match - MOVQ 48(SP), R14 - SUBQ AX, R14 - CMPQ R13, AX - JG copy_all_from_history - MOVQ R13, AX - SUBQ $0x10, AX - JB copy_4_small - -copy_4_loop: - MOVUPS (R14), X0 - MOVUPS X0, (R10) - ADDQ $0x10, R14 - ADDQ $0x10, R10 - SUBQ $0x10, AX - JAE copy_4_loop - LEAQ 16(R14)(AX*1), R14 - LEAQ 16(R10)(AX*1), R10 - MOVUPS -16(R14), X0 - MOVUPS X0, -16(R10) - JMP copy_4_end - -copy_4_small: - CMPQ R13, $0x03 - JE copy_4_move_3 - CMPQ R13, $0x08 - JB copy_4_move_4through7 - JMP copy_4_move_8through16 - -copy_4_move_3: - MOVW (R14), AX - MOVB 2(R14), CL - MOVW AX, (R10) - MOVB CL, 2(R10) - ADDQ R13, R14 - ADDQ R13, R10 - JMP copy_4_end - -copy_4_move_4through7: - MOVL (R14), AX - MOVL -4(R14)(R13*1), CX - MOVL AX, (R10) - MOVL CX, -4(R10)(R13*1) - ADDQ R13, R14 - ADDQ R13, R10 - JMP copy_4_end - -copy_4_move_8through16: - MOVQ (R14), AX - MOVQ -8(R14)(R13*1), CX - MOVQ AX, (R10) - MOVQ CX, -8(R10)(R13*1) - ADDQ R13, R14 - ADDQ R13, R10 - -copy_4_end: - ADDQ R13, R12 - JMP handle_loop - JMP loop_finished - -copy_all_from_history: - MOVQ AX, R15 - SUBQ $0x10, R15 - JB copy_5_small - -copy_5_loop: - MOVUPS (R14), X0 - MOVUPS X0, (R10) - ADDQ $0x10, R14 - ADDQ $0x10, R10 - SUBQ $0x10, R15 - JAE copy_5_loop - LEAQ 16(R14)(R15*1), R14 - LEAQ 16(R10)(R15*1), R10 - MOVUPS -16(R14), X0 - MOVUPS X0, -16(R10) - JMP copy_5_end - -copy_5_small: - CMPQ AX, $0x03 - JE copy_5_move_3 - JB copy_5_move_1or2 - CMPQ AX, $0x08 - JB copy_5_move_4through7 - JMP copy_5_move_8through16 - -copy_5_move_1or2: - MOVB (R14), R15 - MOVB -1(R14)(AX*1), BP - MOVB R15, (R10) - MOVB BP, -1(R10)(AX*1) - ADDQ AX, R14 - ADDQ AX, R10 - JMP copy_5_end - -copy_5_move_3: - MOVW (R14), R15 - MOVB 2(R14), BP - MOVW R15, (R10) - MOVB BP, 2(R10) - ADDQ AX, R14 - ADDQ AX, R10 - JMP copy_5_end - -copy_5_move_4through7: - MOVL (R14), R15 - MOVL -4(R14)(AX*1), BP - MOVL R15, (R10) - MOVL BP, -4(R10)(AX*1) - ADDQ AX, R14 - ADDQ AX, R10 - JMP copy_5_end - -copy_5_move_8through16: - MOVQ (R14), R15 - MOVQ -8(R14)(AX*1), BP - MOVQ R15, (R10) - MOVQ BP, -8(R10)(AX*1) - ADDQ AX, R14 - ADDQ AX, R10 - -copy_5_end: - ADDQ AX, R12 - SUBQ AX, R13 - - // Copy match from the current buffer -copy_match: - MOVQ R10, AX - SUBQ CX, AX - - // ml <= mo - CMPQ R13, CX - JA copy_overlapping_match - - // Copy non-overlapping match - ADDQ R13, R12 - MOVQ R10, CX - ADDQ R13, R10 - -copy_2: - MOVUPS (AX), X0 - MOVUPS X0, (CX) - ADDQ $0x10, AX - ADDQ $0x10, CX - SUBQ $0x10, R13 - JHI copy_2 - JMP handle_loop - - // Copy overlapping match -copy_overlapping_match: - ADDQ R13, R12 - -copy_slow_3: - MOVB (AX), CL - MOVB CL, (R10) - INCQ AX - INCQ R10 - DECQ R13 - JNZ copy_slow_3 - -handle_loop: - MOVQ ctx+16(FP), AX - DECQ 96(AX) - JNS sequenceDecs_decodeSync_amd64_main_loop - -loop_finished: - MOVQ br+8(FP), AX - MOVQ DX, 24(AX) - MOVB BL, 40(AX) - MOVQ SI, 32(AX) - - // Update the context - MOVQ ctx+16(FP), AX - MOVQ R12, 136(AX) - MOVQ 144(AX), CX - SUBQ CX, R11 - MOVQ R11, 168(AX) - - // Return success - MOVQ $0x00000000, ret+24(FP) - RET - - // Return with match length error -sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch: - MOVQ 16(SP), AX - MOVQ ctx+16(FP), CX - MOVQ AX, 216(CX) - MOVQ $0x00000001, ret+24(FP) - RET - - // Return with match too long error -sequenceDecs_decodeSync_amd64_error_match_len_too_big: - MOVQ ctx+16(FP), AX - MOVQ 16(SP), CX - MOVQ CX, 216(AX) - MOVQ $0x00000002, ret+24(FP) - RET - - // Return with match offset too long error -error_match_off_too_big: - MOVQ ctx+16(FP), AX - MOVQ 8(SP), CX - MOVQ CX, 224(AX) - MOVQ R12, 136(AX) - MOVQ $0x00000003, ret+24(FP) - RET - - // Return with not enough literals error -error_not_enough_literals: - MOVQ ctx+16(FP), AX - MOVQ 24(SP), CX - MOVQ CX, 208(AX) - MOVQ $0x00000004, ret+24(FP) - RET - - // Return with overread error -error_overread: - MOVQ $0x00000006, ret+24(FP) - RET - - // Return with not enough output space error -error_not_enough_space: - MOVQ ctx+16(FP), AX - MOVQ 24(SP), CX - MOVQ CX, 208(AX) - MOVQ 16(SP), CX - MOVQ CX, 216(AX) - MOVQ R12, 136(AX) - MOVQ $0x00000005, ret+24(FP) - RET - -// func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int -// Requires: BMI, BMI2, CMOV, SSE -TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 - MOVQ br+8(FP), BX - MOVQ 24(BX), AX - MOVBQZX 40(BX), DX - MOVQ (BX), CX - MOVQ 32(BX), BX - ADDQ BX, CX - MOVQ CX, (SP) - MOVQ ctx+16(FP), CX - MOVQ 72(CX), SI - MOVQ 80(CX), DI - MOVQ 88(CX), R8 - XORQ R9, R9 - MOVQ R9, 8(SP) - MOVQ R9, 16(SP) - MOVQ R9, 24(SP) - MOVQ 112(CX), R9 - MOVQ 128(CX), R10 - MOVQ R10, 32(SP) - MOVQ 144(CX), R10 - MOVQ 136(CX), R11 - MOVQ 200(CX), R12 - MOVQ R12, 56(SP) - MOVQ 176(CX), R12 - MOVQ R12, 48(SP) - MOVQ 184(CX), CX - MOVQ CX, 40(SP) - MOVQ 40(SP), CX - ADDQ CX, 48(SP) - - // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) - ADDQ R9, 32(SP) - - // outBase += outPosition - ADDQ R11, R9 - -sequenceDecs_decodeSync_bmi2_main_loop: - MOVQ (SP), R12 - - // Fill bitreader to have enough for the offset and match length. - CMPQ BX, $0x08 - JL sequenceDecs_decodeSync_bmi2_fill_byte_by_byte - MOVQ DX, CX - SHRQ $0x03, CX - SUBQ CX, R12 - MOVQ (R12), AX - SUBQ CX, BX - ANDQ $0x07, DX - JMP sequenceDecs_decodeSync_bmi2_fill_end - -sequenceDecs_decodeSync_bmi2_fill_byte_by_byte: - CMPQ BX, $0x00 - JLE sequenceDecs_decodeSync_bmi2_fill_check_overread - CMPQ DX, $0x07 - JLE sequenceDecs_decodeSync_bmi2_fill_end - SHLQ $0x08, AX - SUBQ $0x01, R12 - SUBQ $0x01, BX - SUBQ $0x08, DX - MOVBQZX (R12), CX - ORQ CX, AX - JMP sequenceDecs_decodeSync_bmi2_fill_byte_by_byte - -sequenceDecs_decodeSync_bmi2_fill_check_overread: - CMPQ DX, $0x40 - JA error_overread - -sequenceDecs_decodeSync_bmi2_fill_end: - // Update offset - MOVQ $0x00000808, CX - BEXTRQ CX, R8, R13 - MOVQ AX, R14 - LEAQ (DX)(R13*1), CX - ROLQ CL, R14 - BZHIQ R13, R14, R14 - MOVQ CX, DX - MOVQ R8, CX - SHRQ $0x20, CX - ADDQ R14, CX - MOVQ CX, 8(SP) - - // Update match length - MOVQ $0x00000808, CX - BEXTRQ CX, DI, R13 - MOVQ AX, R14 - LEAQ (DX)(R13*1), CX - ROLQ CL, R14 - BZHIQ R13, R14, R14 - MOVQ CX, DX - MOVQ DI, CX - SHRQ $0x20, CX - ADDQ R14, CX - MOVQ CX, 16(SP) - - // Fill bitreader to have enough for the remaining - CMPQ BX, $0x08 - JL sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte - MOVQ DX, CX - SHRQ $0x03, CX - SUBQ CX, R12 - MOVQ (R12), AX - SUBQ CX, BX - ANDQ $0x07, DX - JMP sequenceDecs_decodeSync_bmi2_fill_2_end - -sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte: - CMPQ BX, $0x00 - JLE sequenceDecs_decodeSync_bmi2_fill_2_check_overread - CMPQ DX, $0x07 - JLE sequenceDecs_decodeSync_bmi2_fill_2_end - SHLQ $0x08, AX - SUBQ $0x01, R12 - SUBQ $0x01, BX - SUBQ $0x08, DX - MOVBQZX (R12), CX - ORQ CX, AX - JMP sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte - -sequenceDecs_decodeSync_bmi2_fill_2_check_overread: - CMPQ DX, $0x40 - JA error_overread - -sequenceDecs_decodeSync_bmi2_fill_2_end: - // Update literal length - MOVQ $0x00000808, CX - BEXTRQ CX, SI, R13 - MOVQ AX, R14 - LEAQ (DX)(R13*1), CX - ROLQ CL, R14 - BZHIQ R13, R14, R14 - MOVQ CX, DX - MOVQ SI, CX - SHRQ $0x20, CX - ADDQ R14, CX - MOVQ CX, 24(SP) - - // Fill bitreader for state updates - MOVQ R12, (SP) - MOVQ $0x00000808, CX - BEXTRQ CX, R8, R12 - MOVQ ctx+16(FP), CX - CMPQ 96(CX), $0x00 - JZ sequenceDecs_decodeSync_bmi2_skip_update - LEAQ (SI)(DI*1), R13 - ADDQ R8, R13 - MOVBQZX R13, R13 - LEAQ (DX)(R13*1), CX - MOVQ AX, R14 - MOVQ CX, DX - ROLQ CL, R14 - BZHIQ R13, R14, R14 - - // Update Offset State - BZHIQ R8, R14, CX - SHRXQ R8, R14, R14 - SHRL $0x10, R8 - ADDQ CX, R8 - - // Load ctx.ofTable - MOVQ ctx+16(FP), CX - MOVQ 48(CX), CX - MOVQ (CX)(R8*8), R8 - - // Update Match Length State - BZHIQ DI, R14, CX - SHRXQ DI, R14, R14 - SHRL $0x10, DI - ADDQ CX, DI - - // Load ctx.mlTable - MOVQ ctx+16(FP), CX - MOVQ 24(CX), CX - MOVQ (CX)(DI*8), DI - - // Update Literal Length State - BZHIQ SI, R14, CX - SHRL $0x10, SI - ADDQ CX, SI - - // Load ctx.llTable - MOVQ ctx+16(FP), CX - MOVQ (CX), CX - MOVQ (CX)(SI*8), SI - -sequenceDecs_decodeSync_bmi2_skip_update: - // Adjust offset - MOVQ s+0(FP), CX - MOVQ 8(SP), R13 - CMPQ R12, $0x01 - JBE sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0 - MOVUPS 144(CX), X0 - MOVQ R13, 144(CX) - MOVUPS X0, 152(CX) - JMP sequenceDecs_decodeSync_bmi2_after_adjust - -sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0: - CMPQ 24(SP), $0x00000000 - JNE sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero - INCQ R13 - JMP sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero - -sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero: - TESTQ R13, R13 - JNZ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero - MOVQ 144(CX), R13 - JMP sequenceDecs_decodeSync_bmi2_after_adjust - -sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero: - MOVQ R13, R12 - XORQ R14, R14 - MOVQ $-1, R15 - CMPQ R13, $0x03 - CMOVQEQ R14, R12 - CMOVQEQ R15, R14 - ADDQ 144(CX)(R12*8), R14 - JNZ sequenceDecs_decodeSync_bmi2_adjust_temp_valid - MOVQ $0x00000001, R14 - -sequenceDecs_decodeSync_bmi2_adjust_temp_valid: - CMPQ R13, $0x01 - JZ sequenceDecs_decodeSync_bmi2_adjust_skip - MOVQ 152(CX), R12 - MOVQ R12, 160(CX) - -sequenceDecs_decodeSync_bmi2_adjust_skip: - MOVQ 144(CX), R12 - MOVQ R12, 152(CX) - MOVQ R14, 144(CX) - MOVQ R14, R13 - -sequenceDecs_decodeSync_bmi2_after_adjust: - MOVQ R13, 8(SP) - - // Check values - MOVQ 16(SP), CX - MOVQ 24(SP), R12 - LEAQ (CX)(R12*1), R14 - MOVQ s+0(FP), R15 - ADDQ R14, 256(R15) - MOVQ ctx+16(FP), R14 - SUBQ R12, 104(R14) - JS error_not_enough_literals - CMPQ CX, $0x00020002 - JA sequenceDecs_decodeSync_bmi2_error_match_len_too_big - TESTQ R13, R13 - JNZ sequenceDecs_decodeSync_bmi2_match_len_ofs_ok - TESTQ CX, CX - JNZ sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch - -sequenceDecs_decodeSync_bmi2_match_len_ofs_ok: - MOVQ 24(SP), CX - MOVQ 8(SP), R12 - MOVQ 16(SP), R13 - - // Check if we have enough space in s.out - LEAQ (CX)(R13*1), R14 - ADDQ R9, R14 - CMPQ R14, 32(SP) - JA error_not_enough_space - - // Copy literals - TESTQ CX, CX - JZ check_offset - XORQ R14, R14 - -copy_1: - MOVUPS (R10)(R14*1), X0 - MOVUPS X0, (R9)(R14*1) - ADDQ $0x10, R14 - CMPQ R14, CX - JB copy_1 - ADDQ CX, R10 - ADDQ CX, R9 - ADDQ CX, R11 - - // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) -check_offset: - MOVQ R11, CX - ADDQ 40(SP), CX - CMPQ R12, CX - JG error_match_off_too_big - CMPQ R12, 56(SP) - JG error_match_off_too_big - - // Copy match from history - MOVQ R12, CX - SUBQ R11, CX - JLS copy_match - MOVQ 48(SP), R14 - SUBQ CX, R14 - CMPQ R13, CX - JG copy_all_from_history - MOVQ R13, CX - SUBQ $0x10, CX - JB copy_4_small - -copy_4_loop: - MOVUPS (R14), X0 - MOVUPS X0, (R9) - ADDQ $0x10, R14 - ADDQ $0x10, R9 - SUBQ $0x10, CX - JAE copy_4_loop - LEAQ 16(R14)(CX*1), R14 - LEAQ 16(R9)(CX*1), R9 - MOVUPS -16(R14), X0 - MOVUPS X0, -16(R9) - JMP copy_4_end - -copy_4_small: - CMPQ R13, $0x03 - JE copy_4_move_3 - CMPQ R13, $0x08 - JB copy_4_move_4through7 - JMP copy_4_move_8through16 - -copy_4_move_3: - MOVW (R14), CX - MOVB 2(R14), R12 - MOVW CX, (R9) - MOVB R12, 2(R9) - ADDQ R13, R14 - ADDQ R13, R9 - JMP copy_4_end - -copy_4_move_4through7: - MOVL (R14), CX - MOVL -4(R14)(R13*1), R12 - MOVL CX, (R9) - MOVL R12, -4(R9)(R13*1) - ADDQ R13, R14 - ADDQ R13, R9 - JMP copy_4_end - -copy_4_move_8through16: - MOVQ (R14), CX - MOVQ -8(R14)(R13*1), R12 - MOVQ CX, (R9) - MOVQ R12, -8(R9)(R13*1) - ADDQ R13, R14 - ADDQ R13, R9 - -copy_4_end: - ADDQ R13, R11 - JMP handle_loop - JMP loop_finished - -copy_all_from_history: - MOVQ CX, R15 - SUBQ $0x10, R15 - JB copy_5_small - -copy_5_loop: - MOVUPS (R14), X0 - MOVUPS X0, (R9) - ADDQ $0x10, R14 - ADDQ $0x10, R9 - SUBQ $0x10, R15 - JAE copy_5_loop - LEAQ 16(R14)(R15*1), R14 - LEAQ 16(R9)(R15*1), R9 - MOVUPS -16(R14), X0 - MOVUPS X0, -16(R9) - JMP copy_5_end - -copy_5_small: - CMPQ CX, $0x03 - JE copy_5_move_3 - JB copy_5_move_1or2 - CMPQ CX, $0x08 - JB copy_5_move_4through7 - JMP copy_5_move_8through16 - -copy_5_move_1or2: - MOVB (R14), R15 - MOVB -1(R14)(CX*1), BP - MOVB R15, (R9) - MOVB BP, -1(R9)(CX*1) - ADDQ CX, R14 - ADDQ CX, R9 - JMP copy_5_end - -copy_5_move_3: - MOVW (R14), R15 - MOVB 2(R14), BP - MOVW R15, (R9) - MOVB BP, 2(R9) - ADDQ CX, R14 - ADDQ CX, R9 - JMP copy_5_end - -copy_5_move_4through7: - MOVL (R14), R15 - MOVL -4(R14)(CX*1), BP - MOVL R15, (R9) - MOVL BP, -4(R9)(CX*1) - ADDQ CX, R14 - ADDQ CX, R9 - JMP copy_5_end - -copy_5_move_8through16: - MOVQ (R14), R15 - MOVQ -8(R14)(CX*1), BP - MOVQ R15, (R9) - MOVQ BP, -8(R9)(CX*1) - ADDQ CX, R14 - ADDQ CX, R9 - -copy_5_end: - ADDQ CX, R11 - SUBQ CX, R13 - - // Copy match from the current buffer -copy_match: - MOVQ R9, CX - SUBQ R12, CX - - // ml <= mo - CMPQ R13, R12 - JA copy_overlapping_match - - // Copy non-overlapping match - ADDQ R13, R11 - MOVQ R9, R12 - ADDQ R13, R9 - -copy_2: - MOVUPS (CX), X0 - MOVUPS X0, (R12) - ADDQ $0x10, CX - ADDQ $0x10, R12 - SUBQ $0x10, R13 - JHI copy_2 - JMP handle_loop - - // Copy overlapping match -copy_overlapping_match: - ADDQ R13, R11 - -copy_slow_3: - MOVB (CX), R12 - MOVB R12, (R9) - INCQ CX - INCQ R9 - DECQ R13 - JNZ copy_slow_3 - -handle_loop: - MOVQ ctx+16(FP), CX - DECQ 96(CX) - JNS sequenceDecs_decodeSync_bmi2_main_loop - -loop_finished: - MOVQ br+8(FP), CX - MOVQ AX, 24(CX) - MOVB DL, 40(CX) - MOVQ BX, 32(CX) - - // Update the context - MOVQ ctx+16(FP), AX - MOVQ R11, 136(AX) - MOVQ 144(AX), CX - SUBQ CX, R10 - MOVQ R10, 168(AX) - - // Return success - MOVQ $0x00000000, ret+24(FP) - RET - - // Return with match length error -sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch: - MOVQ 16(SP), AX - MOVQ ctx+16(FP), CX - MOVQ AX, 216(CX) - MOVQ $0x00000001, ret+24(FP) - RET - - // Return with match too long error -sequenceDecs_decodeSync_bmi2_error_match_len_too_big: - MOVQ ctx+16(FP), AX - MOVQ 16(SP), CX - MOVQ CX, 216(AX) - MOVQ $0x00000002, ret+24(FP) - RET - - // Return with match offset too long error -error_match_off_too_big: - MOVQ ctx+16(FP), AX - MOVQ 8(SP), CX - MOVQ CX, 224(AX) - MOVQ R11, 136(AX) - MOVQ $0x00000003, ret+24(FP) - RET - - // Return with not enough literals error -error_not_enough_literals: - MOVQ ctx+16(FP), AX - MOVQ 24(SP), CX - MOVQ CX, 208(AX) - MOVQ $0x00000004, ret+24(FP) - RET - - // Return with overread error -error_overread: - MOVQ $0x00000006, ret+24(FP) - RET - - // Return with not enough output space error -error_not_enough_space: - MOVQ ctx+16(FP), AX - MOVQ 24(SP), CX - MOVQ CX, 208(AX) - MOVQ 16(SP), CX - MOVQ CX, 216(AX) - MOVQ R11, 136(AX) - MOVQ $0x00000005, ret+24(FP) - RET - -// func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int -// Requires: CMOV, SSE -TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 - MOVQ br+8(FP), CX - MOVQ 24(CX), DX - MOVBQZX 40(CX), BX - MOVQ (CX), AX - MOVQ 32(CX), SI - ADDQ SI, AX - MOVQ AX, (SP) - MOVQ ctx+16(FP), AX - MOVQ 72(AX), DI - MOVQ 80(AX), R8 - MOVQ 88(AX), R9 - XORQ CX, CX - MOVQ CX, 8(SP) - MOVQ CX, 16(SP) - MOVQ CX, 24(SP) - MOVQ 112(AX), R10 - MOVQ 128(AX), CX - MOVQ CX, 32(SP) - MOVQ 144(AX), R11 - MOVQ 136(AX), R12 - MOVQ 200(AX), CX - MOVQ CX, 56(SP) - MOVQ 176(AX), CX - MOVQ CX, 48(SP) - MOVQ 184(AX), AX - MOVQ AX, 40(SP) - MOVQ 40(SP), AX - ADDQ AX, 48(SP) - - // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) - ADDQ R10, 32(SP) - - // outBase += outPosition - ADDQ R12, R10 - -sequenceDecs_decodeSync_safe_amd64_main_loop: - MOVQ (SP), R13 - - // Fill bitreader to have enough for the offset and match length. - CMPQ SI, $0x08 - JL sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte - MOVQ BX, AX - SHRQ $0x03, AX - SUBQ AX, R13 - MOVQ (R13), DX - SUBQ AX, SI - ANDQ $0x07, BX - JMP sequenceDecs_decodeSync_safe_amd64_fill_end - -sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte: - CMPQ SI, $0x00 - JLE sequenceDecs_decodeSync_safe_amd64_fill_check_overread - CMPQ BX, $0x07 - JLE sequenceDecs_decodeSync_safe_amd64_fill_end - SHLQ $0x08, DX - SUBQ $0x01, R13 - SUBQ $0x01, SI - SUBQ $0x08, BX - MOVBQZX (R13), AX - ORQ AX, DX - JMP sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte - -sequenceDecs_decodeSync_safe_amd64_fill_check_overread: - CMPQ BX, $0x40 - JA error_overread - -sequenceDecs_decodeSync_safe_amd64_fill_end: - // Update offset - MOVQ R9, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - SHRQ $0x20, AX - TESTQ CX, CX - JZ sequenceDecs_decodeSync_safe_amd64_of_update_zero - ADDQ CX, BX - CMPQ BX, $0x40 - JA sequenceDecs_decodeSync_safe_amd64_of_update_zero - CMPQ CX, $0x40 - JAE sequenceDecs_decodeSync_safe_amd64_of_update_zero - NEGQ CX - SHRQ CL, R14 - ADDQ R14, AX - -sequenceDecs_decodeSync_safe_amd64_of_update_zero: - MOVQ AX, 8(SP) - - // Update match length - MOVQ R8, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - SHRQ $0x20, AX - TESTQ CX, CX - JZ sequenceDecs_decodeSync_safe_amd64_ml_update_zero - ADDQ CX, BX - CMPQ BX, $0x40 - JA sequenceDecs_decodeSync_safe_amd64_ml_update_zero - CMPQ CX, $0x40 - JAE sequenceDecs_decodeSync_safe_amd64_ml_update_zero - NEGQ CX - SHRQ CL, R14 - ADDQ R14, AX - -sequenceDecs_decodeSync_safe_amd64_ml_update_zero: - MOVQ AX, 16(SP) - - // Fill bitreader to have enough for the remaining - CMPQ SI, $0x08 - JL sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte - MOVQ BX, AX - SHRQ $0x03, AX - SUBQ AX, R13 - MOVQ (R13), DX - SUBQ AX, SI - ANDQ $0x07, BX - JMP sequenceDecs_decodeSync_safe_amd64_fill_2_end - -sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte: - CMPQ SI, $0x00 - JLE sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread - CMPQ BX, $0x07 - JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end - SHLQ $0x08, DX - SUBQ $0x01, R13 - SUBQ $0x01, SI - SUBQ $0x08, BX - MOVBQZX (R13), AX - ORQ AX, DX - JMP sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte - -sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread: - CMPQ BX, $0x40 - JA error_overread - -sequenceDecs_decodeSync_safe_amd64_fill_2_end: - // Update literal length - MOVQ DI, AX - MOVQ BX, CX - MOVQ DX, R14 - SHLQ CL, R14 - MOVB AH, CL - SHRQ $0x20, AX - TESTQ CX, CX - JZ sequenceDecs_decodeSync_safe_amd64_ll_update_zero - ADDQ CX, BX - CMPQ BX, $0x40 - JA sequenceDecs_decodeSync_safe_amd64_ll_update_zero - CMPQ CX, $0x40 - JAE sequenceDecs_decodeSync_safe_amd64_ll_update_zero - NEGQ CX - SHRQ CL, R14 - ADDQ R14, AX - -sequenceDecs_decodeSync_safe_amd64_ll_update_zero: - MOVQ AX, 24(SP) - - // Fill bitreader for state updates - MOVQ R13, (SP) - MOVQ R9, AX - SHRQ $0x08, AX - MOVBQZX AL, AX - MOVQ ctx+16(FP), CX - CMPQ 96(CX), $0x00 - JZ sequenceDecs_decodeSync_safe_amd64_skip_update - - // Update Literal Length State - MOVBQZX DI, R13 - SHRL $0x10, DI - LEAQ (BX)(R13*1), CX - MOVQ DX, R14 - MOVQ CX, BX - ROLQ CL, R14 - MOVL $0x00000001, R15 - MOVB R13, CL - SHLL CL, R15 - DECL R15 - ANDQ R15, R14 - ADDQ R14, DI - - // Load ctx.llTable - MOVQ ctx+16(FP), CX - MOVQ (CX), CX - MOVQ (CX)(DI*8), DI - - // Update Match Length State - MOVBQZX R8, R13 - SHRL $0x10, R8 - LEAQ (BX)(R13*1), CX - MOVQ DX, R14 - MOVQ CX, BX - ROLQ CL, R14 - MOVL $0x00000001, R15 - MOVB R13, CL - SHLL CL, R15 - DECL R15 - ANDQ R15, R14 - ADDQ R14, R8 - - // Load ctx.mlTable - MOVQ ctx+16(FP), CX - MOVQ 24(CX), CX - MOVQ (CX)(R8*8), R8 - - // Update Offset State - MOVBQZX R9, R13 - SHRL $0x10, R9 - LEAQ (BX)(R13*1), CX - MOVQ DX, R14 - MOVQ CX, BX - ROLQ CL, R14 - MOVL $0x00000001, R15 - MOVB R13, CL - SHLL CL, R15 - DECL R15 - ANDQ R15, R14 - ADDQ R14, R9 - - // Load ctx.ofTable - MOVQ ctx+16(FP), CX - MOVQ 48(CX), CX - MOVQ (CX)(R9*8), R9 - -sequenceDecs_decodeSync_safe_amd64_skip_update: - // Adjust offset - MOVQ s+0(FP), CX - MOVQ 8(SP), R13 - CMPQ AX, $0x01 - JBE sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0 - MOVUPS 144(CX), X0 - MOVQ R13, 144(CX) - MOVUPS X0, 152(CX) - JMP sequenceDecs_decodeSync_safe_amd64_after_adjust - -sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0: - CMPQ 24(SP), $0x00000000 - JNE sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero - INCQ R13 - JMP sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero - -sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero: - TESTQ R13, R13 - JNZ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero - MOVQ 144(CX), R13 - JMP sequenceDecs_decodeSync_safe_amd64_after_adjust - -sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero: - MOVQ R13, AX - XORQ R14, R14 - MOVQ $-1, R15 - CMPQ R13, $0x03 - CMOVQEQ R14, AX - CMOVQEQ R15, R14 - ADDQ 144(CX)(AX*8), R14 - JNZ sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid - MOVQ $0x00000001, R14 - -sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid: - CMPQ R13, $0x01 - JZ sequenceDecs_decodeSync_safe_amd64_adjust_skip - MOVQ 152(CX), AX - MOVQ AX, 160(CX) - -sequenceDecs_decodeSync_safe_amd64_adjust_skip: - MOVQ 144(CX), AX - MOVQ AX, 152(CX) - MOVQ R14, 144(CX) - MOVQ R14, R13 - -sequenceDecs_decodeSync_safe_amd64_after_adjust: - MOVQ R13, 8(SP) - - // Check values - MOVQ 16(SP), AX - MOVQ 24(SP), CX - LEAQ (AX)(CX*1), R14 - MOVQ s+0(FP), R15 - ADDQ R14, 256(R15) - MOVQ ctx+16(FP), R14 - SUBQ CX, 104(R14) - JS error_not_enough_literals - CMPQ AX, $0x00020002 - JA sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big - TESTQ R13, R13 - JNZ sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok - TESTQ AX, AX - JNZ sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch - -sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok: - MOVQ 24(SP), AX - MOVQ 8(SP), CX - MOVQ 16(SP), R13 - - // Check if we have enough space in s.out - LEAQ (AX)(R13*1), R14 - ADDQ R10, R14 - CMPQ R14, 32(SP) - JA error_not_enough_space - - // Copy literals - TESTQ AX, AX - JZ check_offset - MOVQ AX, R14 - SUBQ $0x10, R14 - JB copy_1_small - -copy_1_loop: - MOVUPS (R11), X0 - MOVUPS X0, (R10) - ADDQ $0x10, R11 - ADDQ $0x10, R10 - SUBQ $0x10, R14 - JAE copy_1_loop - LEAQ 16(R11)(R14*1), R11 - LEAQ 16(R10)(R14*1), R10 - MOVUPS -16(R11), X0 - MOVUPS X0, -16(R10) - JMP copy_1_end - -copy_1_small: - CMPQ AX, $0x03 - JE copy_1_move_3 - JB copy_1_move_1or2 - CMPQ AX, $0x08 - JB copy_1_move_4through7 - JMP copy_1_move_8through16 - -copy_1_move_1or2: - MOVB (R11), R14 - MOVB -1(R11)(AX*1), R15 - MOVB R14, (R10) - MOVB R15, -1(R10)(AX*1) - ADDQ AX, R11 - ADDQ AX, R10 - JMP copy_1_end - -copy_1_move_3: - MOVW (R11), R14 - MOVB 2(R11), R15 - MOVW R14, (R10) - MOVB R15, 2(R10) - ADDQ AX, R11 - ADDQ AX, R10 - JMP copy_1_end - -copy_1_move_4through7: - MOVL (R11), R14 - MOVL -4(R11)(AX*1), R15 - MOVL R14, (R10) - MOVL R15, -4(R10)(AX*1) - ADDQ AX, R11 - ADDQ AX, R10 - JMP copy_1_end - -copy_1_move_8through16: - MOVQ (R11), R14 - MOVQ -8(R11)(AX*1), R15 - MOVQ R14, (R10) - MOVQ R15, -8(R10)(AX*1) - ADDQ AX, R11 - ADDQ AX, R10 - -copy_1_end: - ADDQ AX, R12 - - // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) -check_offset: - MOVQ R12, AX - ADDQ 40(SP), AX - CMPQ CX, AX - JG error_match_off_too_big - CMPQ CX, 56(SP) - JG error_match_off_too_big - - // Copy match from history - MOVQ CX, AX - SUBQ R12, AX - JLS copy_match - MOVQ 48(SP), R14 - SUBQ AX, R14 - CMPQ R13, AX - JG copy_all_from_history - MOVQ R13, AX - SUBQ $0x10, AX - JB copy_4_small - -copy_4_loop: - MOVUPS (R14), X0 - MOVUPS X0, (R10) - ADDQ $0x10, R14 - ADDQ $0x10, R10 - SUBQ $0x10, AX - JAE copy_4_loop - LEAQ 16(R14)(AX*1), R14 - LEAQ 16(R10)(AX*1), R10 - MOVUPS -16(R14), X0 - MOVUPS X0, -16(R10) - JMP copy_4_end - -copy_4_small: - CMPQ R13, $0x03 - JE copy_4_move_3 - CMPQ R13, $0x08 - JB copy_4_move_4through7 - JMP copy_4_move_8through16 - -copy_4_move_3: - MOVW (R14), AX - MOVB 2(R14), CL - MOVW AX, (R10) - MOVB CL, 2(R10) - ADDQ R13, R14 - ADDQ R13, R10 - JMP copy_4_end - -copy_4_move_4through7: - MOVL (R14), AX - MOVL -4(R14)(R13*1), CX - MOVL AX, (R10) - MOVL CX, -4(R10)(R13*1) - ADDQ R13, R14 - ADDQ R13, R10 - JMP copy_4_end - -copy_4_move_8through16: - MOVQ (R14), AX - MOVQ -8(R14)(R13*1), CX - MOVQ AX, (R10) - MOVQ CX, -8(R10)(R13*1) - ADDQ R13, R14 - ADDQ R13, R10 - -copy_4_end: - ADDQ R13, R12 - JMP handle_loop - JMP loop_finished - -copy_all_from_history: - MOVQ AX, R15 - SUBQ $0x10, R15 - JB copy_5_small - -copy_5_loop: - MOVUPS (R14), X0 - MOVUPS X0, (R10) - ADDQ $0x10, R14 - ADDQ $0x10, R10 - SUBQ $0x10, R15 - JAE copy_5_loop - LEAQ 16(R14)(R15*1), R14 - LEAQ 16(R10)(R15*1), R10 - MOVUPS -16(R14), X0 - MOVUPS X0, -16(R10) - JMP copy_5_end - -copy_5_small: - CMPQ AX, $0x03 - JE copy_5_move_3 - JB copy_5_move_1or2 - CMPQ AX, $0x08 - JB copy_5_move_4through7 - JMP copy_5_move_8through16 - -copy_5_move_1or2: - MOVB (R14), R15 - MOVB -1(R14)(AX*1), BP - MOVB R15, (R10) - MOVB BP, -1(R10)(AX*1) - ADDQ AX, R14 - ADDQ AX, R10 - JMP copy_5_end - -copy_5_move_3: - MOVW (R14), R15 - MOVB 2(R14), BP - MOVW R15, (R10) - MOVB BP, 2(R10) - ADDQ AX, R14 - ADDQ AX, R10 - JMP copy_5_end - -copy_5_move_4through7: - MOVL (R14), R15 - MOVL -4(R14)(AX*1), BP - MOVL R15, (R10) - MOVL BP, -4(R10)(AX*1) - ADDQ AX, R14 - ADDQ AX, R10 - JMP copy_5_end - -copy_5_move_8through16: - MOVQ (R14), R15 - MOVQ -8(R14)(AX*1), BP - MOVQ R15, (R10) - MOVQ BP, -8(R10)(AX*1) - ADDQ AX, R14 - ADDQ AX, R10 - -copy_5_end: - ADDQ AX, R12 - SUBQ AX, R13 - - // Copy match from the current buffer -copy_match: - MOVQ R10, AX - SUBQ CX, AX - - // ml <= mo - CMPQ R13, CX - JA copy_overlapping_match - - // Copy non-overlapping match - ADDQ R13, R12 - MOVQ R13, CX - SUBQ $0x10, CX - JB copy_2_small - -copy_2_loop: - MOVUPS (AX), X0 - MOVUPS X0, (R10) - ADDQ $0x10, AX - ADDQ $0x10, R10 - SUBQ $0x10, CX - JAE copy_2_loop - LEAQ 16(AX)(CX*1), AX - LEAQ 16(R10)(CX*1), R10 - MOVUPS -16(AX), X0 - MOVUPS X0, -16(R10) - JMP copy_2_end - -copy_2_small: - CMPQ R13, $0x03 - JE copy_2_move_3 - JB copy_2_move_1or2 - CMPQ R13, $0x08 - JB copy_2_move_4through7 - JMP copy_2_move_8through16 - -copy_2_move_1or2: - MOVB (AX), CL - MOVB -1(AX)(R13*1), R14 - MOVB CL, (R10) - MOVB R14, -1(R10)(R13*1) - ADDQ R13, AX - ADDQ R13, R10 - JMP copy_2_end - -copy_2_move_3: - MOVW (AX), CX - MOVB 2(AX), R14 - MOVW CX, (R10) - MOVB R14, 2(R10) - ADDQ R13, AX - ADDQ R13, R10 - JMP copy_2_end - -copy_2_move_4through7: - MOVL (AX), CX - MOVL -4(AX)(R13*1), R14 - MOVL CX, (R10) - MOVL R14, -4(R10)(R13*1) - ADDQ R13, AX - ADDQ R13, R10 - JMP copy_2_end - -copy_2_move_8through16: - MOVQ (AX), CX - MOVQ -8(AX)(R13*1), R14 - MOVQ CX, (R10) - MOVQ R14, -8(R10)(R13*1) - ADDQ R13, AX - ADDQ R13, R10 - -copy_2_end: - JMP handle_loop - - // Copy overlapping match -copy_overlapping_match: - ADDQ R13, R12 - -copy_slow_3: - MOVB (AX), CL - MOVB CL, (R10) - INCQ AX - INCQ R10 - DECQ R13 - JNZ copy_slow_3 - -handle_loop: - MOVQ ctx+16(FP), AX - DECQ 96(AX) - JNS sequenceDecs_decodeSync_safe_amd64_main_loop - -loop_finished: - MOVQ br+8(FP), AX - MOVQ DX, 24(AX) - MOVB BL, 40(AX) - MOVQ SI, 32(AX) - - // Update the context - MOVQ ctx+16(FP), AX - MOVQ R12, 136(AX) - MOVQ 144(AX), CX - SUBQ CX, R11 - MOVQ R11, 168(AX) - - // Return success - MOVQ $0x00000000, ret+24(FP) - RET - - // Return with match length error -sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch: - MOVQ 16(SP), AX - MOVQ ctx+16(FP), CX - MOVQ AX, 216(CX) - MOVQ $0x00000001, ret+24(FP) - RET - - // Return with match too long error -sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big: - MOVQ ctx+16(FP), AX - MOVQ 16(SP), CX - MOVQ CX, 216(AX) - MOVQ $0x00000002, ret+24(FP) - RET - - // Return with match offset too long error -error_match_off_too_big: - MOVQ ctx+16(FP), AX - MOVQ 8(SP), CX - MOVQ CX, 224(AX) - MOVQ R12, 136(AX) - MOVQ $0x00000003, ret+24(FP) - RET - - // Return with not enough literals error -error_not_enough_literals: - MOVQ ctx+16(FP), AX - MOVQ 24(SP), CX - MOVQ CX, 208(AX) - MOVQ $0x00000004, ret+24(FP) - RET - - // Return with overread error -error_overread: - MOVQ $0x00000006, ret+24(FP) - RET - - // Return with not enough output space error -error_not_enough_space: - MOVQ ctx+16(FP), AX - MOVQ 24(SP), CX - MOVQ CX, 208(AX) - MOVQ 16(SP), CX - MOVQ CX, 216(AX) - MOVQ R12, 136(AX) - MOVQ $0x00000005, ret+24(FP) - RET - -// func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int -// Requires: BMI, BMI2, CMOV, SSE -TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 - MOVQ br+8(FP), BX - MOVQ 24(BX), AX - MOVBQZX 40(BX), DX - MOVQ (BX), CX - MOVQ 32(BX), BX - ADDQ BX, CX - MOVQ CX, (SP) - MOVQ ctx+16(FP), CX - MOVQ 72(CX), SI - MOVQ 80(CX), DI - MOVQ 88(CX), R8 - XORQ R9, R9 - MOVQ R9, 8(SP) - MOVQ R9, 16(SP) - MOVQ R9, 24(SP) - MOVQ 112(CX), R9 - MOVQ 128(CX), R10 - MOVQ R10, 32(SP) - MOVQ 144(CX), R10 - MOVQ 136(CX), R11 - MOVQ 200(CX), R12 - MOVQ R12, 56(SP) - MOVQ 176(CX), R12 - MOVQ R12, 48(SP) - MOVQ 184(CX), CX - MOVQ CX, 40(SP) - MOVQ 40(SP), CX - ADDQ CX, 48(SP) - - // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) - ADDQ R9, 32(SP) - - // outBase += outPosition - ADDQ R11, R9 - -sequenceDecs_decodeSync_safe_bmi2_main_loop: - MOVQ (SP), R12 - - // Fill bitreader to have enough for the offset and match length. - CMPQ BX, $0x08 - JL sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte - MOVQ DX, CX - SHRQ $0x03, CX - SUBQ CX, R12 - MOVQ (R12), AX - SUBQ CX, BX - ANDQ $0x07, DX - JMP sequenceDecs_decodeSync_safe_bmi2_fill_end - -sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte: - CMPQ BX, $0x00 - JLE sequenceDecs_decodeSync_safe_bmi2_fill_check_overread - CMPQ DX, $0x07 - JLE sequenceDecs_decodeSync_safe_bmi2_fill_end - SHLQ $0x08, AX - SUBQ $0x01, R12 - SUBQ $0x01, BX - SUBQ $0x08, DX - MOVBQZX (R12), CX - ORQ CX, AX - JMP sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte - -sequenceDecs_decodeSync_safe_bmi2_fill_check_overread: - CMPQ DX, $0x40 - JA error_overread - -sequenceDecs_decodeSync_safe_bmi2_fill_end: - // Update offset - MOVQ $0x00000808, CX - BEXTRQ CX, R8, R13 - MOVQ AX, R14 - LEAQ (DX)(R13*1), CX - ROLQ CL, R14 - BZHIQ R13, R14, R14 - MOVQ CX, DX - MOVQ R8, CX - SHRQ $0x20, CX - ADDQ R14, CX - MOVQ CX, 8(SP) - - // Update match length - MOVQ $0x00000808, CX - BEXTRQ CX, DI, R13 - MOVQ AX, R14 - LEAQ (DX)(R13*1), CX - ROLQ CL, R14 - BZHIQ R13, R14, R14 - MOVQ CX, DX - MOVQ DI, CX - SHRQ $0x20, CX - ADDQ R14, CX - MOVQ CX, 16(SP) - - // Fill bitreader to have enough for the remaining - CMPQ BX, $0x08 - JL sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte - MOVQ DX, CX - SHRQ $0x03, CX - SUBQ CX, R12 - MOVQ (R12), AX - SUBQ CX, BX - ANDQ $0x07, DX - JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_end - -sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte: - CMPQ BX, $0x00 - JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread - CMPQ DX, $0x07 - JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end - SHLQ $0x08, AX - SUBQ $0x01, R12 - SUBQ $0x01, BX - SUBQ $0x08, DX - MOVBQZX (R12), CX - ORQ CX, AX - JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte - -sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread: - CMPQ DX, $0x40 - JA error_overread - -sequenceDecs_decodeSync_safe_bmi2_fill_2_end: - // Update literal length - MOVQ $0x00000808, CX - BEXTRQ CX, SI, R13 - MOVQ AX, R14 - LEAQ (DX)(R13*1), CX - ROLQ CL, R14 - BZHIQ R13, R14, R14 - MOVQ CX, DX - MOVQ SI, CX - SHRQ $0x20, CX - ADDQ R14, CX - MOVQ CX, 24(SP) - - // Fill bitreader for state updates - MOVQ R12, (SP) - MOVQ $0x00000808, CX - BEXTRQ CX, R8, R12 - MOVQ ctx+16(FP), CX - CMPQ 96(CX), $0x00 - JZ sequenceDecs_decodeSync_safe_bmi2_skip_update - LEAQ (SI)(DI*1), R13 - ADDQ R8, R13 - MOVBQZX R13, R13 - LEAQ (DX)(R13*1), CX - MOVQ AX, R14 - MOVQ CX, DX - ROLQ CL, R14 - BZHIQ R13, R14, R14 - - // Update Offset State - BZHIQ R8, R14, CX - SHRXQ R8, R14, R14 - SHRL $0x10, R8 - ADDQ CX, R8 - - // Load ctx.ofTable - MOVQ ctx+16(FP), CX - MOVQ 48(CX), CX - MOVQ (CX)(R8*8), R8 - - // Update Match Length State - BZHIQ DI, R14, CX - SHRXQ DI, R14, R14 - SHRL $0x10, DI - ADDQ CX, DI - - // Load ctx.mlTable - MOVQ ctx+16(FP), CX - MOVQ 24(CX), CX - MOVQ (CX)(DI*8), DI - - // Update Literal Length State - BZHIQ SI, R14, CX - SHRL $0x10, SI - ADDQ CX, SI - - // Load ctx.llTable - MOVQ ctx+16(FP), CX - MOVQ (CX), CX - MOVQ (CX)(SI*8), SI - -sequenceDecs_decodeSync_safe_bmi2_skip_update: - // Adjust offset - MOVQ s+0(FP), CX - MOVQ 8(SP), R13 - CMPQ R12, $0x01 - JBE sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0 - MOVUPS 144(CX), X0 - MOVQ R13, 144(CX) - MOVUPS X0, 152(CX) - JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust - -sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0: - CMPQ 24(SP), $0x00000000 - JNE sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero - INCQ R13 - JMP sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero - -sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero: - TESTQ R13, R13 - JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero - MOVQ 144(CX), R13 - JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust - -sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero: - MOVQ R13, R12 - XORQ R14, R14 - MOVQ $-1, R15 - CMPQ R13, $0x03 - CMOVQEQ R14, R12 - CMOVQEQ R15, R14 - ADDQ 144(CX)(R12*8), R14 - JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid - MOVQ $0x00000001, R14 - -sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid: - CMPQ R13, $0x01 - JZ sequenceDecs_decodeSync_safe_bmi2_adjust_skip - MOVQ 152(CX), R12 - MOVQ R12, 160(CX) - -sequenceDecs_decodeSync_safe_bmi2_adjust_skip: - MOVQ 144(CX), R12 - MOVQ R12, 152(CX) - MOVQ R14, 144(CX) - MOVQ R14, R13 - -sequenceDecs_decodeSync_safe_bmi2_after_adjust: - MOVQ R13, 8(SP) - - // Check values - MOVQ 16(SP), CX - MOVQ 24(SP), R12 - LEAQ (CX)(R12*1), R14 - MOVQ s+0(FP), R15 - ADDQ R14, 256(R15) - MOVQ ctx+16(FP), R14 - SUBQ R12, 104(R14) - JS error_not_enough_literals - CMPQ CX, $0x00020002 - JA sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big - TESTQ R13, R13 - JNZ sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok - TESTQ CX, CX - JNZ sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch - -sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok: - MOVQ 24(SP), CX - MOVQ 8(SP), R12 - MOVQ 16(SP), R13 - - // Check if we have enough space in s.out - LEAQ (CX)(R13*1), R14 - ADDQ R9, R14 - CMPQ R14, 32(SP) - JA error_not_enough_space - - // Copy literals - TESTQ CX, CX - JZ check_offset - MOVQ CX, R14 - SUBQ $0x10, R14 - JB copy_1_small - -copy_1_loop: - MOVUPS (R10), X0 - MOVUPS X0, (R9) - ADDQ $0x10, R10 - ADDQ $0x10, R9 - SUBQ $0x10, R14 - JAE copy_1_loop - LEAQ 16(R10)(R14*1), R10 - LEAQ 16(R9)(R14*1), R9 - MOVUPS -16(R10), X0 - MOVUPS X0, -16(R9) - JMP copy_1_end - -copy_1_small: - CMPQ CX, $0x03 - JE copy_1_move_3 - JB copy_1_move_1or2 - CMPQ CX, $0x08 - JB copy_1_move_4through7 - JMP copy_1_move_8through16 - -copy_1_move_1or2: - MOVB (R10), R14 - MOVB -1(R10)(CX*1), R15 - MOVB R14, (R9) - MOVB R15, -1(R9)(CX*1) - ADDQ CX, R10 - ADDQ CX, R9 - JMP copy_1_end - -copy_1_move_3: - MOVW (R10), R14 - MOVB 2(R10), R15 - MOVW R14, (R9) - MOVB R15, 2(R9) - ADDQ CX, R10 - ADDQ CX, R9 - JMP copy_1_end - -copy_1_move_4through7: - MOVL (R10), R14 - MOVL -4(R10)(CX*1), R15 - MOVL R14, (R9) - MOVL R15, -4(R9)(CX*1) - ADDQ CX, R10 - ADDQ CX, R9 - JMP copy_1_end - -copy_1_move_8through16: - MOVQ (R10), R14 - MOVQ -8(R10)(CX*1), R15 - MOVQ R14, (R9) - MOVQ R15, -8(R9)(CX*1) - ADDQ CX, R10 - ADDQ CX, R9 - -copy_1_end: - ADDQ CX, R11 - - // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) -check_offset: - MOVQ R11, CX - ADDQ 40(SP), CX - CMPQ R12, CX - JG error_match_off_too_big - CMPQ R12, 56(SP) - JG error_match_off_too_big - - // Copy match from history - MOVQ R12, CX - SUBQ R11, CX - JLS copy_match - MOVQ 48(SP), R14 - SUBQ CX, R14 - CMPQ R13, CX - JG copy_all_from_history - MOVQ R13, CX - SUBQ $0x10, CX - JB copy_4_small - -copy_4_loop: - MOVUPS (R14), X0 - MOVUPS X0, (R9) - ADDQ $0x10, R14 - ADDQ $0x10, R9 - SUBQ $0x10, CX - JAE copy_4_loop - LEAQ 16(R14)(CX*1), R14 - LEAQ 16(R9)(CX*1), R9 - MOVUPS -16(R14), X0 - MOVUPS X0, -16(R9) - JMP copy_4_end - -copy_4_small: - CMPQ R13, $0x03 - JE copy_4_move_3 - CMPQ R13, $0x08 - JB copy_4_move_4through7 - JMP copy_4_move_8through16 - -copy_4_move_3: - MOVW (R14), CX - MOVB 2(R14), R12 - MOVW CX, (R9) - MOVB R12, 2(R9) - ADDQ R13, R14 - ADDQ R13, R9 - JMP copy_4_end - -copy_4_move_4through7: - MOVL (R14), CX - MOVL -4(R14)(R13*1), R12 - MOVL CX, (R9) - MOVL R12, -4(R9)(R13*1) - ADDQ R13, R14 - ADDQ R13, R9 - JMP copy_4_end - -copy_4_move_8through16: - MOVQ (R14), CX - MOVQ -8(R14)(R13*1), R12 - MOVQ CX, (R9) - MOVQ R12, -8(R9)(R13*1) - ADDQ R13, R14 - ADDQ R13, R9 - -copy_4_end: - ADDQ R13, R11 - JMP handle_loop - JMP loop_finished - -copy_all_from_history: - MOVQ CX, R15 - SUBQ $0x10, R15 - JB copy_5_small - -copy_5_loop: - MOVUPS (R14), X0 - MOVUPS X0, (R9) - ADDQ $0x10, R14 - ADDQ $0x10, R9 - SUBQ $0x10, R15 - JAE copy_5_loop - LEAQ 16(R14)(R15*1), R14 - LEAQ 16(R9)(R15*1), R9 - MOVUPS -16(R14), X0 - MOVUPS X0, -16(R9) - JMP copy_5_end - -copy_5_small: - CMPQ CX, $0x03 - JE copy_5_move_3 - JB copy_5_move_1or2 - CMPQ CX, $0x08 - JB copy_5_move_4through7 - JMP copy_5_move_8through16 - -copy_5_move_1or2: - MOVB (R14), R15 - MOVB -1(R14)(CX*1), BP - MOVB R15, (R9) - MOVB BP, -1(R9)(CX*1) - ADDQ CX, R14 - ADDQ CX, R9 - JMP copy_5_end - -copy_5_move_3: - MOVW (R14), R15 - MOVB 2(R14), BP - MOVW R15, (R9) - MOVB BP, 2(R9) - ADDQ CX, R14 - ADDQ CX, R9 - JMP copy_5_end - -copy_5_move_4through7: - MOVL (R14), R15 - MOVL -4(R14)(CX*1), BP - MOVL R15, (R9) - MOVL BP, -4(R9)(CX*1) - ADDQ CX, R14 - ADDQ CX, R9 - JMP copy_5_end - -copy_5_move_8through16: - MOVQ (R14), R15 - MOVQ -8(R14)(CX*1), BP - MOVQ R15, (R9) - MOVQ BP, -8(R9)(CX*1) - ADDQ CX, R14 - ADDQ CX, R9 - -copy_5_end: - ADDQ CX, R11 - SUBQ CX, R13 - - // Copy match from the current buffer -copy_match: - MOVQ R9, CX - SUBQ R12, CX - - // ml <= mo - CMPQ R13, R12 - JA copy_overlapping_match - - // Copy non-overlapping match - ADDQ R13, R11 - MOVQ R13, R12 - SUBQ $0x10, R12 - JB copy_2_small - -copy_2_loop: - MOVUPS (CX), X0 - MOVUPS X0, (R9) - ADDQ $0x10, CX - ADDQ $0x10, R9 - SUBQ $0x10, R12 - JAE copy_2_loop - LEAQ 16(CX)(R12*1), CX - LEAQ 16(R9)(R12*1), R9 - MOVUPS -16(CX), X0 - MOVUPS X0, -16(R9) - JMP copy_2_end - -copy_2_small: - CMPQ R13, $0x03 - JE copy_2_move_3 - JB copy_2_move_1or2 - CMPQ R13, $0x08 - JB copy_2_move_4through7 - JMP copy_2_move_8through16 - -copy_2_move_1or2: - MOVB (CX), R12 - MOVB -1(CX)(R13*1), R14 - MOVB R12, (R9) - MOVB R14, -1(R9)(R13*1) - ADDQ R13, CX - ADDQ R13, R9 - JMP copy_2_end - -copy_2_move_3: - MOVW (CX), R12 - MOVB 2(CX), R14 - MOVW R12, (R9) - MOVB R14, 2(R9) - ADDQ R13, CX - ADDQ R13, R9 - JMP copy_2_end - -copy_2_move_4through7: - MOVL (CX), R12 - MOVL -4(CX)(R13*1), R14 - MOVL R12, (R9) - MOVL R14, -4(R9)(R13*1) - ADDQ R13, CX - ADDQ R13, R9 - JMP copy_2_end - -copy_2_move_8through16: - MOVQ (CX), R12 - MOVQ -8(CX)(R13*1), R14 - MOVQ R12, (R9) - MOVQ R14, -8(R9)(R13*1) - ADDQ R13, CX - ADDQ R13, R9 - -copy_2_end: - JMP handle_loop - - // Copy overlapping match -copy_overlapping_match: - ADDQ R13, R11 - -copy_slow_3: - MOVB (CX), R12 - MOVB R12, (R9) - INCQ CX - INCQ R9 - DECQ R13 - JNZ copy_slow_3 - -handle_loop: - MOVQ ctx+16(FP), CX - DECQ 96(CX) - JNS sequenceDecs_decodeSync_safe_bmi2_main_loop - -loop_finished: - MOVQ br+8(FP), CX - MOVQ AX, 24(CX) - MOVB DL, 40(CX) - MOVQ BX, 32(CX) - - // Update the context - MOVQ ctx+16(FP), AX - MOVQ R11, 136(AX) - MOVQ 144(AX), CX - SUBQ CX, R10 - MOVQ R10, 168(AX) - - // Return success - MOVQ $0x00000000, ret+24(FP) - RET - - // Return with match length error -sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch: - MOVQ 16(SP), AX - MOVQ ctx+16(FP), CX - MOVQ AX, 216(CX) - MOVQ $0x00000001, ret+24(FP) - RET - - // Return with match too long error -sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big: - MOVQ ctx+16(FP), AX - MOVQ 16(SP), CX - MOVQ CX, 216(AX) - MOVQ $0x00000002, ret+24(FP) - RET - - // Return with match offset too long error -error_match_off_too_big: - MOVQ ctx+16(FP), AX - MOVQ 8(SP), CX - MOVQ CX, 224(AX) - MOVQ R11, 136(AX) - MOVQ $0x00000003, ret+24(FP) - RET - - // Return with not enough literals error -error_not_enough_literals: - MOVQ ctx+16(FP), AX - MOVQ 24(SP), CX - MOVQ CX, 208(AX) - MOVQ $0x00000004, ret+24(FP) - RET - - // Return with overread error -error_overread: - MOVQ $0x00000006, ret+24(FP) - RET - - // Return with not enough output space error -error_not_enough_space: - MOVQ ctx+16(FP), AX - MOVQ 24(SP), CX - MOVQ CX, 208(AX) - MOVQ 16(SP), CX - MOVQ CX, 216(AX) - MOVQ R11, 136(AX) - MOVQ $0x00000005, ret+24(FP) - RET diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go deleted file mode 100644 index 7cec2197c..000000000 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go +++ /dev/null @@ -1,237 +0,0 @@ -//go:build !amd64 || appengine || !gc || noasm -// +build !amd64 appengine !gc noasm - -package zstd - -import ( - "fmt" - "io" -) - -// decode sequences from the stream with the provided history but without dictionary. -func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { - return false, nil -} - -// decode sequences from the stream without the provided history. -func (s *sequenceDecs) decode(seqs []seqVals) error { - br := s.br - - // Grab full sizes tables, to avoid bounds checks. - llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] - llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state - s.seqSize = 0 - litRemain := len(s.literals) - - maxBlockSize := maxCompressedBlockSize - if s.windowSize < maxBlockSize { - maxBlockSize = s.windowSize - } - for i := range seqs { - var ll, mo, ml int - if br.cursor > 4+((maxOffsetBits+16+16)>>3) { - // inlined function: - // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) - - // Final will not read from stream. - var llB, mlB, moB uint8 - ll, llB = llState.final() - ml, mlB = mlState.final() - mo, moB = ofState.final() - - // extra bits are stored in reverse order. - br.fillFast() - mo += br.getBits(moB) - if s.maxBits > 32 { - br.fillFast() - } - ml += br.getBits(mlB) - ll += br.getBits(llB) - - if moB > 1 { - s.prevOffset[2] = s.prevOffset[1] - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = mo - } else { - // mo = s.adjustOffset(mo, ll, moB) - // Inlined for rather big speedup - if ll == 0 { - // There is an exception though, when current sequence's literals_length = 0. - // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, - // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. - mo++ - } - - if mo == 0 { - mo = s.prevOffset[0] - } else { - var temp int - if mo == 3 { - temp = s.prevOffset[0] - 1 - } else { - temp = s.prevOffset[mo] - } - - if temp == 0 { - // 0 is not valid; input is corrupted; force offset to 1 - println("WARNING: temp was 0") - temp = 1 - } - - if mo != 1 { - s.prevOffset[2] = s.prevOffset[1] - } - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = temp - mo = temp - } - } - br.fillFast() - } else { - if br.overread() { - if debugDecoder { - printf("reading sequence %d, exceeded available data\n", i) - } - return io.ErrUnexpectedEOF - } - ll, mo, ml = s.next(br, llState, mlState, ofState) - br.fill() - } - - if debugSequences { - println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml) - } - // Evaluate. - // We might be doing this async, so do it early. - if mo == 0 && ml > 0 { - return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) - } - if ml > maxMatchLen { - return fmt.Errorf("match len (%d) bigger than max allowed length", ml) - } - s.seqSize += ll + ml - if s.seqSize > maxBlockSize { - return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) - } - litRemain -= ll - if litRemain < 0 { - return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll) - } - seqs[i] = seqVals{ - ll: ll, - ml: ml, - mo: mo, - } - if i == len(seqs)-1 { - // This is the last sequence, so we shouldn't update state. - break - } - - // Manually inlined, ~ 5-20% faster - // Update all 3 states at once. Approx 20% faster. - nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits() - if nBits == 0 { - llState = llTable[llState.newState()&maxTableMask] - mlState = mlTable[mlState.newState()&maxTableMask] - ofState = ofTable[ofState.newState()&maxTableMask] - } else { - bits := br.get32BitsFast(nBits) - lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) - llState = llTable[(llState.newState()+lowBits)&maxTableMask] - - lowBits = uint16(bits >> (ofState.nbBits() & 31)) - lowBits &= bitMask[mlState.nbBits()&15] - mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask] - - lowBits = uint16(bits) & bitMask[ofState.nbBits()&15] - ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask] - } - } - s.seqSize += litRemain - if s.seqSize > maxBlockSize { - return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) - } - err := br.close() - if err != nil { - printf("Closing sequences: %v, %+v\n", err, *br) - } - return err -} - -// executeSimple handles cases when a dictionary is not used. -func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error { - // Ensure we have enough output size... - if len(s.out)+s.seqSize > cap(s.out) { - addBytes := s.seqSize + len(s.out) - s.out = append(s.out, make([]byte, addBytes)...) - s.out = s.out[:len(s.out)-addBytes] - } - - if debugDecoder { - printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize) - } - - var t = len(s.out) - out := s.out[:t+s.seqSize] - - for _, seq := range seqs { - // Add literals - copy(out[t:], s.literals[:seq.ll]) - t += seq.ll - s.literals = s.literals[seq.ll:] - - // Malformed input - if seq.mo > t+len(hist) || seq.mo > s.windowSize { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist)) - } - - // Copy from history. - if v := seq.mo - t; v > 0 { - // v is the start position in history from end. - start := len(hist) - v - if seq.ml > v { - // Some goes into the current block. - // Copy remainder of history - copy(out[t:], hist[start:]) - t += v - seq.ml -= v - } else { - copy(out[t:], hist[start:start+seq.ml]) - t += seq.ml - continue - } - } - - // We must be in the current buffer now - if seq.ml > 0 { - start := t - seq.mo - if seq.ml <= t-start { - // No overlap - copy(out[t:], out[start:start+seq.ml]) - t += seq.ml - } else { - // Overlapping copy - // Extend destination slice and copy one byte at the time. - src := out[start : start+seq.ml] - dst := out[t:] - dst = dst[:len(src)] - t += len(src) - // Destination is the space we just added. - for i := range src { - dst[i] = src[i] - } - } - } - } - // Add final literals - copy(out[t:], s.literals) - if debugDecoder { - t += len(s.literals) - if t != len(out) { - panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) - } - } - s.out = out - - return nil -} diff --git a/vendor/github.com/klauspost/compress/zstd/seqenc.go b/vendor/github.com/klauspost/compress/zstd/seqenc.go deleted file mode 100644 index 65045eabd..000000000 --- a/vendor/github.com/klauspost/compress/zstd/seqenc.go +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import "math/bits" - -type seqCoders struct { - llEnc, ofEnc, mlEnc *fseEncoder - llPrev, ofPrev, mlPrev *fseEncoder -} - -// swap coders with another (block). -func (s *seqCoders) swap(other *seqCoders) { - *s, *other = *other, *s -} - -// setPrev will update the previous encoders to the actually used ones -// and make sure a fresh one is in the main slot. -func (s *seqCoders) setPrev(ll, ml, of *fseEncoder) { - compareSwap := func(used *fseEncoder, current, prev **fseEncoder) { - // We used the new one, more current to history and reuse the previous history - if *current == used { - *prev, *current = *current, *prev - c := *current - p := *prev - c.reUsed = false - p.reUsed = true - return - } - if used == *prev { - return - } - // Ensure we cannot reuse by accident - prevEnc := *prev - prevEnc.symbolLen = 0 - } - compareSwap(ll, &s.llEnc, &s.llPrev) - compareSwap(ml, &s.mlEnc, &s.mlPrev) - compareSwap(of, &s.ofEnc, &s.ofPrev) -} - -func highBit(val uint32) (n uint32) { - return uint32(bits.Len32(val) - 1) -} - -var llCodeTable = [64]byte{0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 16, 17, 17, 18, 18, 19, 19, - 20, 20, 20, 20, 21, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, - 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24} - -// Up to 6 bits -const maxLLCode = 35 - -// llBitsTable translates from ll code to number of bits. -var llBitsTable = [maxLLCode + 1]byte{ - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 3, 3, - 4, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 16} - -// llCode returns the code that represents the literal length requested. -func llCode(litLength uint32) uint8 { - const llDeltaCode = 19 - if litLength <= 63 { - return llCodeTable[litLength&63] - } - return uint8(highBit(litLength)) + llDeltaCode -} - -var mlCodeTable = [128]byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, - 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42} - -// Up to 6 bits -const maxMLCode = 52 - -// mlBitsTable translates from ml code to number of bits. -var mlBitsTable = [maxMLCode + 1]byte{ - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 3, 3, - 4, 4, 5, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 16} - -// note : mlBase = matchLength - MINMATCH; -// because it's the format it's stored in seqStore->sequences -func mlCode(mlBase uint32) uint8 { - const mlDeltaCode = 36 - if mlBase <= 127 { - return mlCodeTable[mlBase&127] - } - return uint8(highBit(mlBase)) + mlDeltaCode -} - -func ofCode(offset uint32) uint8 { - // A valid offset will always be > 0. - return uint8(bits.Len32(offset) - 1) -} diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go deleted file mode 100644 index a17381b8f..000000000 --- a/vendor/github.com/klauspost/compress/zstd/snappy.go +++ /dev/null @@ -1,434 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -import ( - "encoding/binary" - "errors" - "hash/crc32" - "io" - - "github.com/klauspost/compress/huff0" - snappy "github.com/klauspost/compress/internal/snapref" -) - -const ( - snappyTagLiteral = 0x00 - snappyTagCopy1 = 0x01 - snappyTagCopy2 = 0x02 - snappyTagCopy4 = 0x03 -) - -const ( - snappyChecksumSize = 4 - snappyMagicBody = "sNaPpY" - - // snappyMaxBlockSize is the maximum size of the input to encodeBlock. It is not - // part of the wire format per se, but some parts of the encoder assume - // that an offset fits into a uint16. - // - // Also, for the framing format (Writer type instead of Encode function), - // https://github.com/google/snappy/blob/master/framing_format.txt says - // that "the uncompressed data in a chunk must be no longer than 65536 - // bytes". - snappyMaxBlockSize = 65536 - - // snappyMaxEncodedLenOfMaxBlockSize equals MaxEncodedLen(snappyMaxBlockSize), but is - // hard coded to be a const instead of a variable, so that obufLen can also - // be a const. Their equivalence is confirmed by - // TestMaxEncodedLenOfMaxBlockSize. - snappyMaxEncodedLenOfMaxBlockSize = 76490 -) - -const ( - chunkTypeCompressedData = 0x00 - chunkTypeUncompressedData = 0x01 - chunkTypePadding = 0xfe - chunkTypeStreamIdentifier = 0xff -) - -var ( - // ErrSnappyCorrupt reports that the input is invalid. - ErrSnappyCorrupt = errors.New("snappy: corrupt input") - // ErrSnappyTooLarge reports that the uncompressed length is too large. - ErrSnappyTooLarge = errors.New("snappy: decoded block is too large") - // ErrSnappyUnsupported reports that the input isn't supported. - ErrSnappyUnsupported = errors.New("snappy: unsupported input") - - errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length") -) - -// SnappyConverter can read SnappyConverter-compressed streams and convert them to zstd. -// Conversion is done by converting the stream directly from Snappy without intermediate -// full decoding. -// Therefore the compression ratio is much less than what can be done by a full decompression -// and compression, and a faulty Snappy stream may lead to a faulty Zstandard stream without -// any errors being generated. -// No CRC value is being generated and not all CRC values of the Snappy stream are checked. -// However, it provides really fast recompression of Snappy streams. -// The converter can be reused to avoid allocations, even after errors. -type SnappyConverter struct { - r io.Reader - err error - buf []byte - block *blockEnc -} - -// Convert the Snappy stream supplied in 'in' and write the zStandard stream to 'w'. -// If any error is detected on the Snappy stream it is returned. -// The number of bytes written is returned. -func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { - initPredefined() - r.err = nil - r.r = in - if r.block == nil { - r.block = &blockEnc{} - r.block.init() - } - r.block.initNewEncode() - if len(r.buf) != snappyMaxEncodedLenOfMaxBlockSize+snappyChecksumSize { - r.buf = make([]byte, snappyMaxEncodedLenOfMaxBlockSize+snappyChecksumSize) - } - r.block.litEnc.Reuse = huff0.ReusePolicyNone - var written int64 - var readHeader bool - { - header := frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0]) - - var n int - n, r.err = w.Write(header) - if r.err != nil { - return written, r.err - } - written += int64(n) - } - - for { - if !r.readFull(r.buf[:4], true) { - // Add empty last block - r.block.reset(nil) - r.block.last = true - err := r.block.encodeLits(r.block.literals, false) - if err != nil { - return written, err - } - n, err := w.Write(r.block.output) - if err != nil { - return written, err - } - written += int64(n) - - return written, r.err - } - chunkType := r.buf[0] - if !readHeader { - if chunkType != chunkTypeStreamIdentifier { - println("chunkType != chunkTypeStreamIdentifier", chunkType) - r.err = ErrSnappyCorrupt - return written, r.err - } - readHeader = true - } - chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 - if chunkLen > len(r.buf) { - println("chunkLen > len(r.buf)", chunkType) - r.err = ErrSnappyUnsupported - return written, r.err - } - - // The chunk types are specified at - // https://github.com/google/snappy/blob/master/framing_format.txt - switch chunkType { - case chunkTypeCompressedData: - // Section 4.2. Compressed data (chunk type 0x00). - if chunkLen < snappyChecksumSize { - println("chunkLen < snappyChecksumSize", chunkLen, snappyChecksumSize) - r.err = ErrSnappyCorrupt - return written, r.err - } - buf := r.buf[:chunkLen] - if !r.readFull(buf, false) { - return written, r.err - } - //checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - buf = buf[snappyChecksumSize:] - - n, hdr, err := snappyDecodedLen(buf) - if err != nil { - r.err = err - return written, r.err - } - buf = buf[hdr:] - if n > snappyMaxBlockSize { - println("n > snappyMaxBlockSize", n, snappyMaxBlockSize) - r.err = ErrSnappyCorrupt - return written, r.err - } - r.block.reset(nil) - r.block.pushOffsets() - if err := decodeSnappy(r.block, buf); err != nil { - r.err = err - return written, r.err - } - if r.block.size+r.block.extraLits != n { - printf("invalid size, want %d, got %d\n", n, r.block.size+r.block.extraLits) - r.err = ErrSnappyCorrupt - return written, r.err - } - err = r.block.encode(nil, false, false) - switch err { - case errIncompressible: - r.block.popOffsets() - r.block.reset(nil) - r.block.literals, err = snappy.Decode(r.block.literals[:n], r.buf[snappyChecksumSize:chunkLen]) - if err != nil { - return written, err - } - err = r.block.encodeLits(r.block.literals, false) - if err != nil { - return written, err - } - case nil: - default: - return written, err - } - - n, r.err = w.Write(r.block.output) - if r.err != nil { - return written, r.err - } - written += int64(n) - continue - case chunkTypeUncompressedData: - if debugEncoder { - println("Uncompressed, chunklen", chunkLen) - } - // Section 4.3. Uncompressed data (chunk type 0x01). - if chunkLen < snappyChecksumSize { - println("chunkLen < snappyChecksumSize", chunkLen, snappyChecksumSize) - r.err = ErrSnappyCorrupt - return written, r.err - } - r.block.reset(nil) - buf := r.buf[:snappyChecksumSize] - if !r.readFull(buf, false) { - return written, r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - // Read directly into r.decoded instead of via r.buf. - n := chunkLen - snappyChecksumSize - if n > snappyMaxBlockSize { - println("n > snappyMaxBlockSize", n, snappyMaxBlockSize) - r.err = ErrSnappyCorrupt - return written, r.err - } - r.block.literals = r.block.literals[:n] - if !r.readFull(r.block.literals, false) { - return written, r.err - } - if snappyCRC(r.block.literals) != checksum { - println("literals crc mismatch") - r.err = ErrSnappyCorrupt - return written, r.err - } - err := r.block.encodeLits(r.block.literals, false) - if err != nil { - return written, err - } - n, r.err = w.Write(r.block.output) - if r.err != nil { - return written, r.err - } - written += int64(n) - continue - - case chunkTypeStreamIdentifier: - if debugEncoder { - println("stream id", chunkLen, len(snappyMagicBody)) - } - // Section 4.1. Stream identifier (chunk type 0xff). - if chunkLen != len(snappyMagicBody) { - println("chunkLen != len(snappyMagicBody)", chunkLen, len(snappyMagicBody)) - r.err = ErrSnappyCorrupt - return written, r.err - } - if !r.readFull(r.buf[:len(snappyMagicBody)], false) { - return written, r.err - } - for i := 0; i < len(snappyMagicBody); i++ { - if r.buf[i] != snappyMagicBody[i] { - println("r.buf[i] != snappyMagicBody[i]", r.buf[i], snappyMagicBody[i], i) - r.err = ErrSnappyCorrupt - return written, r.err - } - } - continue - } - - if chunkType <= 0x7f { - // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). - println("chunkType <= 0x7f") - r.err = ErrSnappyUnsupported - return written, r.err - } - // Section 4.4 Padding (chunk type 0xfe). - // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). - if !r.readFull(r.buf[:chunkLen], false) { - return written, r.err - } - } -} - -// decodeSnappy writes the decoding of src to dst. It assumes that the varint-encoded -// length of the decompressed bytes has already been read. -func decodeSnappy(blk *blockEnc, src []byte) error { - //decodeRef(make([]byte, snappyMaxBlockSize), src) - var s, length int - lits := blk.extraLits - var offset uint32 - for s < len(src) { - switch src[s] & 0x03 { - case snappyTagLiteral: - x := uint32(src[s] >> 2) - switch { - case x < 60: - s++ - case x == 60: - s += 2 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - println("uint(s) > uint(len(src)", s, src) - return ErrSnappyCorrupt - } - x = uint32(src[s-1]) - case x == 61: - s += 3 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - println("uint(s) > uint(len(src)", s, src) - return ErrSnappyCorrupt - } - x = uint32(src[s-2]) | uint32(src[s-1])<<8 - case x == 62: - s += 4 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - println("uint(s) > uint(len(src)", s, src) - return ErrSnappyCorrupt - } - x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 - case x == 63: - s += 5 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - println("uint(s) > uint(len(src)", s, src) - return ErrSnappyCorrupt - } - x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 - } - if x > snappyMaxBlockSize { - println("x > snappyMaxBlockSize", x, snappyMaxBlockSize) - return ErrSnappyCorrupt - } - length = int(x) + 1 - if length <= 0 { - println("length <= 0 ", length) - - return errUnsupportedLiteralLength - } - //if length > snappyMaxBlockSize-d || uint32(length) > len(src)-s { - // return ErrSnappyCorrupt - //} - - blk.literals = append(blk.literals, src[s:s+length]...) - //println(length, "litLen") - lits += length - s += length - continue - - case snappyTagCopy1: - s += 2 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - println("uint(s) > uint(len(src)", s, len(src)) - return ErrSnappyCorrupt - } - length = 4 + int(src[s-2])>>2&0x7 - offset = uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]) - - case snappyTagCopy2: - s += 3 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - println("uint(s) > uint(len(src)", s, len(src)) - return ErrSnappyCorrupt - } - length = 1 + int(src[s-3])>>2 - offset = uint32(src[s-2]) | uint32(src[s-1])<<8 - - case snappyTagCopy4: - s += 5 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - println("uint(s) > uint(len(src)", s, len(src)) - return ErrSnappyCorrupt - } - length = 1 + int(src[s-5])>>2 - offset = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 - } - - if offset <= 0 || blk.size+lits < int(offset) /*|| length > len(blk)-d */ { - println("offset <= 0 || blk.size+lits < int(offset)", offset, blk.size+lits, int(offset), blk.size, lits) - - return ErrSnappyCorrupt - } - - // Check if offset is one of the recent offsets. - // Adjusts the output offset accordingly. - // Gives a tiny bit of compression, typically around 1%. - if false { - offset = blk.matchOffset(offset, uint32(lits)) - } else { - offset += 3 - } - - blk.sequences = append(blk.sequences, seq{ - litLen: uint32(lits), - offset: offset, - matchLen: uint32(length) - zstdMinMatch, - }) - blk.size += length + lits - lits = 0 - } - blk.extraLits = lits - return nil -} - -func (r *SnappyConverter) readFull(p []byte, allowEOF bool) (ok bool) { - if _, r.err = io.ReadFull(r.r, p); r.err != nil { - if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { - r.err = ErrSnappyCorrupt - } - return false - } - return true -} - -var crcTable = crc32.MakeTable(crc32.Castagnoli) - -// crc implements the checksum specified in section 3 of -// https://github.com/google/snappy/blob/master/framing_format.txt -func snappyCRC(b []byte) uint32 { - c := crc32.Update(0, crcTable, b) - return c>>15 | c<<17 + 0xa282ead8 -} - -// snappyDecodedLen returns the length of the decoded block and the number of bytes -// that the length header occupied. -func snappyDecodedLen(src []byte) (blockLen, headerLen int, err error) { - v, n := binary.Uvarint(src) - if n <= 0 || v > 0xffffffff { - return 0, 0, ErrSnappyCorrupt - } - - const wordSize = 32 << (^uint(0) >> 32 & 1) - if wordSize == 32 && v > 0x7fffffff { - return 0, 0, ErrSnappyTooLarge - } - return int(v), n, nil -} diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go deleted file mode 100644 index 29c15c8c4..000000000 --- a/vendor/github.com/klauspost/compress/zstd/zip.go +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. - -package zstd - -import ( - "errors" - "io" - "sync" -) - -// ZipMethodWinZip is the method for Zstandard compressed data inside Zip files for WinZip. -// See https://www.winzip.com/win/en/comp_info.html -const ZipMethodWinZip = 93 - -// ZipMethodPKWare is the original method number used by PKWARE to indicate Zstandard compression. -// Deprecated: This has been deprecated by PKWARE, use ZipMethodWinZip instead for compression. -// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT -const ZipMethodPKWare = 20 - -// zipReaderPool is the default reader pool. -var zipReaderPool = sync.Pool{New: func() interface{} { - z, err := NewReader(nil, WithDecoderLowmem(true), WithDecoderMaxWindow(128<<20), WithDecoderConcurrency(1)) - if err != nil { - panic(err) - } - return z -}} - -// newZipReader creates a pooled zip decompressor. -func newZipReader(opts ...DOption) func(r io.Reader) io.ReadCloser { - pool := &zipReaderPool - if len(opts) > 0 { - opts = append([]DOption{WithDecoderLowmem(true), WithDecoderMaxWindow(128 << 20)}, opts...) - // Force concurrency 1 - opts = append(opts, WithDecoderConcurrency(1)) - // Create our own pool - pool = &sync.Pool{} - } - return func(r io.Reader) io.ReadCloser { - dec, ok := pool.Get().(*Decoder) - if ok { - dec.Reset(r) - } else { - d, err := NewReader(r, opts...) - if err != nil { - panic(err) - } - dec = d - } - return &pooledZipReader{dec: dec, pool: pool} - } -} - -type pooledZipReader struct { - mu sync.Mutex // guards Close and Read - pool *sync.Pool - dec *Decoder -} - -func (r *pooledZipReader) Read(p []byte) (n int, err error) { - r.mu.Lock() - defer r.mu.Unlock() - if r.dec == nil { - return 0, errors.New("read after close or EOF") - } - dec, err := r.dec.Read(p) - if err == io.EOF { - r.dec.Reset(nil) - r.pool.Put(r.dec) - r.dec = nil - } - return dec, err -} - -func (r *pooledZipReader) Close() error { - r.mu.Lock() - defer r.mu.Unlock() - var err error - if r.dec != nil { - err = r.dec.Reset(nil) - r.pool.Put(r.dec) - r.dec = nil - } - return err -} - -type pooledZipWriter struct { - mu sync.Mutex // guards Close and Read - enc *Encoder - pool *sync.Pool -} - -func (w *pooledZipWriter) Write(p []byte) (n int, err error) { - w.mu.Lock() - defer w.mu.Unlock() - if w.enc == nil { - return 0, errors.New("Write after Close") - } - return w.enc.Write(p) -} - -func (w *pooledZipWriter) Close() error { - w.mu.Lock() - defer w.mu.Unlock() - var err error - if w.enc != nil { - err = w.enc.Close() - w.pool.Put(w.enc) - w.enc = nil - } - return err -} - -// ZipCompressor returns a compressor that can be registered with zip libraries. -// The provided encoder options will be used on all encodes. -func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) { - var pool sync.Pool - return func(w io.Writer) (io.WriteCloser, error) { - enc, ok := pool.Get().(*Encoder) - if ok { - enc.Reset(w) - } else { - var err error - enc, err = NewWriter(w, opts...) - if err != nil { - return nil, err - } - } - return &pooledZipWriter{enc: enc, pool: &pool}, nil - } -} - -// ZipDecompressor returns a decompressor that can be registered with zip libraries. -// See ZipCompressor for example. -// Options can be specified. WithDecoderConcurrency(1) is forced, -// and by default a 128MB maximum decompression window is specified. -// The window size can be overridden if required. -func ZipDecompressor(opts ...DOption) func(r io.Reader) io.ReadCloser { - return newZipReader(opts...) -} diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go deleted file mode 100644 index 6252b46ae..000000000 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ /dev/null @@ -1,126 +0,0 @@ -// Package zstd provides decompression of zstandard files. -// -// For advanced usage and examples, go to the README: https://github.com/klauspost/compress/tree/master/zstd#zstd -package zstd - -import ( - "bytes" - "errors" - "log" - "math" - - "github.com/klauspost/compress/internal/le" -) - -// enable debug printing -const debug = false - -// enable encoding debug printing -const debugEncoder = debug - -// enable decoding debug printing -const debugDecoder = debug - -// Enable extra assertions. -const debugAsserts = debug || false - -// print sequence details -const debugSequences = false - -// print detailed matching information -const debugMatches = false - -// force encoder to use predefined tables. -const forcePreDef = false - -// zstdMinMatch is the minimum zstd match length. -const zstdMinMatch = 3 - -// fcsUnknown is used for unknown frame content size. -const fcsUnknown = math.MaxUint64 - -var ( - // ErrReservedBlockType is returned when a reserved block type is found. - // Typically this indicates wrong or corrupted input. - ErrReservedBlockType = errors.New("invalid input: reserved block type encountered") - - // ErrCompressedSizeTooBig is returned when a block is bigger than allowed. - // Typically this indicates wrong or corrupted input. - ErrCompressedSizeTooBig = errors.New("invalid input: compressed size too big") - - // ErrBlockTooSmall is returned when a block is too small to be decoded. - // Typically returned on invalid input. - ErrBlockTooSmall = errors.New("block too small") - - // ErrUnexpectedBlockSize is returned when a block has unexpected size. - // Typically returned on invalid input. - ErrUnexpectedBlockSize = errors.New("unexpected block size") - - // ErrMagicMismatch is returned when a "magic" number isn't what is expected. - // Typically this indicates wrong or corrupted input. - ErrMagicMismatch = errors.New("invalid input: magic number mismatch") - - // ErrWindowSizeExceeded is returned when a reference exceeds the valid window size. - // Typically this indicates wrong or corrupted input. - ErrWindowSizeExceeded = errors.New("window size exceeded") - - // ErrWindowSizeTooSmall is returned when no window size is specified. - // Typically this indicates wrong or corrupted input. - ErrWindowSizeTooSmall = errors.New("invalid input: window size was too small") - - // ErrDecoderSizeExceeded is returned if decompressed size exceeds the configured limit. - ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit") - - // ErrUnknownDictionary is returned if the dictionary ID is unknown. - ErrUnknownDictionary = errors.New("unknown dictionary") - - // ErrFrameSizeExceeded is returned if the stated frame size is exceeded. - // This is only returned if SingleSegment is specified on the frame. - ErrFrameSizeExceeded = errors.New("frame size exceeded") - - // ErrFrameSizeMismatch is returned if the stated frame size does not match the expected size. - // This is only returned if SingleSegment is specified on the frame. - ErrFrameSizeMismatch = errors.New("frame size does not match size on stream") - - // ErrCRCMismatch is returned if CRC mismatches. - ErrCRCMismatch = errors.New("CRC check failed") - - // ErrDecoderClosed will be returned if the Decoder was used after - // Close has been called. - ErrDecoderClosed = errors.New("decoder used after Close") - - // ErrEncoderClosed will be returned if the Encoder was used after - // Close has been called. - ErrEncoderClosed = errors.New("encoder used after Close") - - // ErrDecoderNilInput is returned when a nil Reader was provided - // and an operation other than Reset/DecodeAll/Close was attempted. - ErrDecoderNilInput = errors.New("nil input provided as reader") -) - -func println(a ...interface{}) { - if debug || debugDecoder || debugEncoder { - log.Println(a...) - } -} - -func printf(format string, a ...interface{}) { - if debug || debugDecoder || debugEncoder { - log.Printf(format, a...) - } -} - -func load3232(b []byte, i int32) uint32 { - return le.Load32(b, i) -} - -func load6432(b []byte, i int32) uint64 { - return le.Load64(b, i) -} - -type byter interface { - Bytes() []byte - Len() int -} - -var _ byter = &bytes.Buffer{} diff --git a/vendor/github.com/prometheus/client_golang/prometheus/collectorfunc.go b/vendor/github.com/prometheus/client_golang/prometheus/collectorfunc.go new file mode 100644 index 000000000..9a71a15db --- /dev/null +++ b/vendor/github.com/prometheus/client_golang/prometheus/collectorfunc.go @@ -0,0 +1,30 @@ +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package prometheus + +// CollectorFunc is a convenient way to implement a Prometheus Collector +// without interface boilerplate. +// This implementation is based on DescribeByCollect method. +// familiarize yourself to it before using. +type CollectorFunc func(chan<- Metric) + +// Collect calls the defined CollectorFunc function with the provided Metrics channel +func (f CollectorFunc) Collect(ch chan<- Metric) { + f(ch) +} + +// Describe sends the descriptor information using DescribeByCollect +func (f CollectorFunc) Describe(ch chan<- *Desc) { + DescribeByCollect(f, ch) +} diff --git a/vendor/github.com/prometheus/client_golang/prometheus/promhttp/http.go b/vendor/github.com/prometheus/client_golang/prometheus/promhttp/http.go index 28eed2672..763d99e36 100644 --- a/vendor/github.com/prometheus/client_golang/prometheus/promhttp/http.go +++ b/vendor/github.com/prometheus/client_golang/prometheus/promhttp/http.go @@ -41,11 +41,11 @@ import ( "sync" "time" - "github.com/klauspost/compress/zstd" "github.com/prometheus/common/expfmt" "github.com/prometheus/client_golang/internal/github.com/golang/gddo/httputil" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp/internal" ) const ( @@ -65,7 +65,13 @@ const ( Zstd Compression = "zstd" ) -var defaultCompressionFormats = []Compression{Identity, Gzip, Zstd} +func defaultCompressionFormats() []Compression { + if internal.NewZstdWriter != nil { + return []Compression{Identity, Gzip, Zstd} + } else { + return []Compression{Identity, Gzip} + } +} var gzipPool = sync.Pool{ New: func() interface{} { @@ -138,7 +144,7 @@ func HandlerForTransactional(reg prometheus.TransactionalGatherer, opts HandlerO // Select compression formats to offer based on default or user choice. var compressions []string if !opts.DisableCompression { - offers := defaultCompressionFormats + offers := defaultCompressionFormats() if len(opts.OfferedCompressions) > 0 { offers = opts.OfferedCompressions } @@ -466,14 +472,12 @@ func negotiateEncodingWriter(r *http.Request, rw io.Writer, compressions []strin switch selected { case "zstd": - // TODO(mrueg): Replace klauspost/compress with stdlib implementation once https://github.com/golang/go/issues/62513 is implemented. - z, err := zstd.NewWriter(rw, zstd.WithEncoderLevel(zstd.SpeedFastest)) - if err != nil { - return nil, "", func() {}, err + if internal.NewZstdWriter == nil { + // The content encoding was not implemented yet. + return nil, "", func() {}, fmt.Errorf("content compression format not recognized: %s. Valid formats are: %s", selected, defaultCompressionFormats()) } - - z.Reset(rw) - return z, selected, func() { _ = z.Close() }, nil + writer, closeWriter, err := internal.NewZstdWriter(rw) + return writer, selected, closeWriter, err case "gzip": gz := gzipPool.Get().(*gzip.Writer) gz.Reset(rw) @@ -483,6 +487,6 @@ func negotiateEncodingWriter(r *http.Request, rw io.Writer, compressions []strin return rw, selected, func() {}, nil default: // The content encoding was not implemented yet. - return nil, "", func() {}, fmt.Errorf("content compression format not recognized: %s. Valid formats are: %s", selected, defaultCompressionFormats) + return nil, "", func() {}, fmt.Errorf("content compression format not recognized: %s. Valid formats are: %s", selected, defaultCompressionFormats()) } } diff --git a/vendor/github.com/prometheus/client_golang/prometheus/promhttp/internal/compression.go b/vendor/github.com/prometheus/client_golang/prometheus/promhttp/internal/compression.go new file mode 100644 index 000000000..c5039590f --- /dev/null +++ b/vendor/github.com/prometheus/client_golang/prometheus/promhttp/internal/compression.go @@ -0,0 +1,21 @@ +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "io" +) + +// NewZstdWriter enables zstd write support if non-nil. +var NewZstdWriter func(rw io.Writer) (_ io.Writer, closeWriter func(), _ error) diff --git a/vendor/modules.txt b/vendor/modules.txt index 1ba6fe441..e3a5321aa 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -627,16 +627,9 @@ github.com/json-iterator/go github.com/k3a/html2text # github.com/klauspost/compress v1.18.0 ## explicit; go 1.22 -github.com/klauspost/compress -github.com/klauspost/compress/fse -github.com/klauspost/compress/huff0 -github.com/klauspost/compress/internal/cpuinfo github.com/klauspost/compress/internal/le github.com/klauspost/compress/internal/race -github.com/klauspost/compress/internal/snapref github.com/klauspost/compress/s2 -github.com/klauspost/compress/zstd -github.com/klauspost/compress/zstd/internal/xxhash # github.com/klauspost/cpuid/v2 v2.2.10 ## explicit; go 1.22 github.com/klauspost/cpuid/v2 @@ -745,13 +738,14 @@ github.com/pquerna/otp github.com/pquerna/otp/hotp github.com/pquerna/otp/internal github.com/pquerna/otp/totp -# github.com/prometheus/client_golang v1.21.1 -## explicit; go 1.21 +# github.com/prometheus/client_golang v1.22.0 +## explicit; go 1.22 github.com/prometheus/client_golang/internal/github.com/golang/gddo/httputil github.com/prometheus/client_golang/internal/github.com/golang/gddo/httputil/header github.com/prometheus/client_golang/prometheus github.com/prometheus/client_golang/prometheus/internal github.com/prometheus/client_golang/prometheus/promhttp +github.com/prometheus/client_golang/prometheus/promhttp/internal # github.com/prometheus/client_model v0.6.1 ## explicit; go 1.19 github.com/prometheus/client_model/go