mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[chore] bump go structr cache version -> v0.6.0 (#2773)
* update go-structr library -> v0.6.0, add necessary wrapping types + code changes to support these changes * update readme with go-structr package changes * improved wrapping of the SliceCache type * add code comments for the cache wrapper types * remove test.out 😇 --------- Co-authored-by: tobi <31960611+tsmethurst@users.noreply.github.com>
This commit is contained in:
6
vendor/github.com/zeebo/xxh3/.gitignore
generated
vendored
6
vendor/github.com/zeebo/xxh3/.gitignore
generated
vendored
@ -1,6 +0,0 @@
|
||||
upstream
|
||||
*.pprof
|
||||
xxh3.test
|
||||
.vscode
|
||||
*.txt
|
||||
_compat
|
25
vendor/github.com/zeebo/xxh3/LICENSE
generated
vendored
25
vendor/github.com/zeebo/xxh3/LICENSE
generated
vendored
@ -1,25 +0,0 @@
|
||||
xxHash Library
|
||||
Copyright (c) 2012-2014, Yann Collet
|
||||
Copyright (c) 2019, Jeff Wendling
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
27
vendor/github.com/zeebo/xxh3/Makefile
generated
vendored
27
vendor/github.com/zeebo/xxh3/Makefile
generated
vendored
@ -1,27 +0,0 @@
|
||||
.PHONY: all vet
|
||||
all: genasm _compat
|
||||
|
||||
genasm: avo/avx.go avo/sse.go
|
||||
cd ./avo; go generate gen.go
|
||||
|
||||
clean:
|
||||
rm accum_vector_avx_amd64.s
|
||||
rm accum_vector_sse_amd64.s
|
||||
rm _compat
|
||||
|
||||
upstream/xxhash.o: upstream/xxhash.h
|
||||
( cd upstream && make )
|
||||
|
||||
_compat: _compat.c upstream/xxhash.o
|
||||
gcc -o _compat _compat.c ./upstream/xxhash.o
|
||||
|
||||
vet:
|
||||
GOOS=linux GOARCH=386 GO386=softfloat go vet ./...
|
||||
GOOS=windows GOARCH=386 GO386=softfloat go vet ./...
|
||||
GOOS=linux GOARCH=amd64 go vet ./...
|
||||
GOOS=windows GOARCH=amd64 go vet ./...
|
||||
GOOS=darwin GOARCH=amd64 go vet ./...
|
||||
GOOS=linux GOARCH=arm go vet ./...
|
||||
GOOS=linux GOARCH=arm64 go vet ./...
|
||||
GOOS=windows GOARCH=arm64 go vet ./...
|
||||
GOOS=darwin GOARCH=arm64 go vet ./...
|
38
vendor/github.com/zeebo/xxh3/README.md
generated
vendored
38
vendor/github.com/zeebo/xxh3/README.md
generated
vendored
@ -1,38 +0,0 @@
|
||||
# XXH3
|
||||
[](https://godoc.org/github.com/zeebo/xxh3)
|
||||
[](https://sourcegraph.com/github.com/zeebo/xxh3?badge)
|
||||
[](https://goreportcard.com/report/github.com/zeebo/xxh3)
|
||||
|
||||
This package is a port of the [xxh3](https://github.com/Cyan4973/xxHash) library to Go.
|
||||
|
||||
Upstream has fixed the output as of v0.8.0, and this package matches that.
|
||||
|
||||
---
|
||||
|
||||
# Benchmarks
|
||||
|
||||
Run on my `i7-8850H CPU @ 2.60GHz`
|
||||
|
||||
## Small Sizes
|
||||
|
||||
| Bytes | Rate |
|
||||
|-----------|--------------------------------------|
|
||||
|` 0 ` |` 0.74 ns/op ` |
|
||||
|` 1-3 ` |` 4.19 ns/op (0.24 GB/s - 0.71 GB/s) `|
|
||||
|` 4-8 ` |` 4.16 ns/op (0.97 GB/s - 1.98 GB/s) `|
|
||||
|` 9-16 ` |` 4.46 ns/op (2.02 GB/s - 3.58 GB/s) `|
|
||||
|` 17-32 ` |` 6.22 ns/op (2.76 GB/s - 5.15 GB/s) `|
|
||||
|` 33-64 ` |` 8.00 ns/op (4.13 GB/s - 8.13 GB/s) `|
|
||||
|` 65-96 ` |` 11.0 ns/op (5.91 GB/s - 8.84 GB/s) `|
|
||||
|` 97-128 ` |` 12.8 ns/op (7.68 GB/s - 10.0 GB/s) `|
|
||||
|
||||
## Large Sizes
|
||||
|
||||
| Bytes | Rate | SSE2 Rate | AVX2 Rate |
|
||||
|---------|--------------------------|--------------------------|--------------------------|
|
||||
|` 129 ` |` 13.6 ns/op (9.45 GB/s) `| | |
|
||||
|` 240 ` |` 23.8 ns/op (10.1 GB/s) `| | |
|
||||
|` 241 ` |` 40.5 ns/op (5.97 GB/s) `|` 23.3 ns/op (10.4 GB/s) `|` 20.1 ns/op (12.0 GB/s) `|
|
||||
|` 512 ` |` 69.8 ns/op (7.34 GB/s) `|` 30.4 ns/op (16.9 GB/s) `|` 24.7 ns/op (20.7 GB/s) `|
|
||||
|` 1024 ` |` 132 ns/op (7.77 GB/s) `|` 48.9 ns/op (20.9 GB/s) `|` 37.7 ns/op (27.2 GB/s) `|
|
||||
|` 100KB `|` 13.0 us/op (7.88 GB/s) `|` 4.05 us/op (25.3 GB/s) `|` 2.31 us/op (44.3 GB/s) `|
|
39
vendor/github.com/zeebo/xxh3/_compat.c
generated
vendored
39
vendor/github.com/zeebo/xxh3/_compat.c
generated
vendored
@ -1,39 +0,0 @@
|
||||
#include "upstream/xxhash.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
unsigned char buf[4096];
|
||||
for (int i = 0; i < 4096; i++) {
|
||||
buf[i] = (unsigned char)((i+1)%251);
|
||||
}
|
||||
|
||||
printf("var testVecs64 = []uint64{\n");
|
||||
for (int i = 0; i < 4096; i++) {
|
||||
if (i % 4 == 0) {
|
||||
printf("\t");
|
||||
}
|
||||
|
||||
uint64_t h = XXH3_64bits(buf, (size_t)i);
|
||||
printf("0x%lx, ", h);
|
||||
|
||||
if (i % 4 == 3) {
|
||||
printf("\n\t");
|
||||
}
|
||||
}
|
||||
printf("}\n\n");
|
||||
|
||||
printf("var testVecs128 = [][2]uint64{\n");
|
||||
for (int i = 0; i < 4096; i++) {
|
||||
if (i % 4 == 0) {
|
||||
printf("\t");
|
||||
}
|
||||
|
||||
XXH128_hash_t h = XXH3_128bits(buf, (size_t)i);
|
||||
printf("{0x%lx, 0x%lx}, ", h.high64, h.low64);
|
||||
|
||||
if (i % 4 == 3) {
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
printf("}\n\n");
|
||||
}
|
542
vendor/github.com/zeebo/xxh3/accum_generic.go
generated
vendored
542
vendor/github.com/zeebo/xxh3/accum_generic.go
generated
vendored
@ -1,542 +0,0 @@
|
||||
package xxh3
|
||||
|
||||
// avx512Switch is the size at which the avx512 code is used.
|
||||
// Bigger blocks benefit more.
|
||||
const avx512Switch = 1 << 10
|
||||
|
||||
func accumScalar(accs *[8]u64, p, secret ptr, l u64) {
|
||||
if secret != key {
|
||||
accumScalarSeed(accs, p, secret, l)
|
||||
return
|
||||
}
|
||||
for l > _block {
|
||||
k := secret
|
||||
|
||||
// accs
|
||||
for i := 0; i < 16; i++ {
|
||||
dv0 := readU64(p, 8*0)
|
||||
dk0 := dv0 ^ readU64(k, 8*0)
|
||||
accs[1] += dv0
|
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32)
|
||||
|
||||
dv1 := readU64(p, 8*1)
|
||||
dk1 := dv1 ^ readU64(k, 8*1)
|
||||
accs[0] += dv1
|
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32)
|
||||
|
||||
dv2 := readU64(p, 8*2)
|
||||
dk2 := dv2 ^ readU64(k, 8*2)
|
||||
accs[3] += dv2
|
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32)
|
||||
|
||||
dv3 := readU64(p, 8*3)
|
||||
dk3 := dv3 ^ readU64(k, 8*3)
|
||||
accs[2] += dv3
|
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32)
|
||||
|
||||
dv4 := readU64(p, 8*4)
|
||||
dk4 := dv4 ^ readU64(k, 8*4)
|
||||
accs[5] += dv4
|
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32)
|
||||
|
||||
dv5 := readU64(p, 8*5)
|
||||
dk5 := dv5 ^ readU64(k, 8*5)
|
||||
accs[4] += dv5
|
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32)
|
||||
|
||||
dv6 := readU64(p, 8*6)
|
||||
dk6 := dv6 ^ readU64(k, 8*6)
|
||||
accs[7] += dv6
|
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32)
|
||||
|
||||
dv7 := readU64(p, 8*7)
|
||||
dk7 := dv7 ^ readU64(k, 8*7)
|
||||
accs[6] += dv7
|
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32)
|
||||
|
||||
l -= _stripe
|
||||
if l > 0 {
|
||||
p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8)
|
||||
}
|
||||
}
|
||||
|
||||
// scramble accs
|
||||
accs[0] ^= accs[0] >> 47
|
||||
accs[0] ^= key64_128
|
||||
accs[0] *= prime32_1
|
||||
|
||||
accs[1] ^= accs[1] >> 47
|
||||
accs[1] ^= key64_136
|
||||
accs[1] *= prime32_1
|
||||
|
||||
accs[2] ^= accs[2] >> 47
|
||||
accs[2] ^= key64_144
|
||||
accs[2] *= prime32_1
|
||||
|
||||
accs[3] ^= accs[3] >> 47
|
||||
accs[3] ^= key64_152
|
||||
accs[3] *= prime32_1
|
||||
|
||||
accs[4] ^= accs[4] >> 47
|
||||
accs[4] ^= key64_160
|
||||
accs[4] *= prime32_1
|
||||
|
||||
accs[5] ^= accs[5] >> 47
|
||||
accs[5] ^= key64_168
|
||||
accs[5] *= prime32_1
|
||||
|
||||
accs[6] ^= accs[6] >> 47
|
||||
accs[6] ^= key64_176
|
||||
accs[6] *= prime32_1
|
||||
|
||||
accs[7] ^= accs[7] >> 47
|
||||
accs[7] ^= key64_184
|
||||
accs[7] *= prime32_1
|
||||
}
|
||||
|
||||
if l > 0 {
|
||||
t, k := (l-1)/_stripe, secret
|
||||
|
||||
for i := u64(0); i < t; i++ {
|
||||
dv0 := readU64(p, 8*0)
|
||||
dk0 := dv0 ^ readU64(k, 8*0)
|
||||
accs[1] += dv0
|
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32)
|
||||
|
||||
dv1 := readU64(p, 8*1)
|
||||
dk1 := dv1 ^ readU64(k, 8*1)
|
||||
accs[0] += dv1
|
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32)
|
||||
|
||||
dv2 := readU64(p, 8*2)
|
||||
dk2 := dv2 ^ readU64(k, 8*2)
|
||||
accs[3] += dv2
|
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32)
|
||||
|
||||
dv3 := readU64(p, 8*3)
|
||||
dk3 := dv3 ^ readU64(k, 8*3)
|
||||
accs[2] += dv3
|
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32)
|
||||
|
||||
dv4 := readU64(p, 8*4)
|
||||
dk4 := dv4 ^ readU64(k, 8*4)
|
||||
accs[5] += dv4
|
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32)
|
||||
|
||||
dv5 := readU64(p, 8*5)
|
||||
dk5 := dv5 ^ readU64(k, 8*5)
|
||||
accs[4] += dv5
|
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32)
|
||||
|
||||
dv6 := readU64(p, 8*6)
|
||||
dk6 := dv6 ^ readU64(k, 8*6)
|
||||
accs[7] += dv6
|
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32)
|
||||
|
||||
dv7 := readU64(p, 8*7)
|
||||
dk7 := dv7 ^ readU64(k, 8*7)
|
||||
accs[6] += dv7
|
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32)
|
||||
|
||||
l -= _stripe
|
||||
if l > 0 {
|
||||
p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8)
|
||||
}
|
||||
}
|
||||
|
||||
if l > 0 {
|
||||
p = ptr(ui(p) - uintptr(_stripe-l))
|
||||
|
||||
dv0 := readU64(p, 8*0)
|
||||
dk0 := dv0 ^ key64_121
|
||||
accs[1] += dv0
|
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32)
|
||||
|
||||
dv1 := readU64(p, 8*1)
|
||||
dk1 := dv1 ^ key64_129
|
||||
accs[0] += dv1
|
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32)
|
||||
|
||||
dv2 := readU64(p, 8*2)
|
||||
dk2 := dv2 ^ key64_137
|
||||
accs[3] += dv2
|
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32)
|
||||
|
||||
dv3 := readU64(p, 8*3)
|
||||
dk3 := dv3 ^ key64_145
|
||||
accs[2] += dv3
|
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32)
|
||||
|
||||
dv4 := readU64(p, 8*4)
|
||||
dk4 := dv4 ^ key64_153
|
||||
accs[5] += dv4
|
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32)
|
||||
|
||||
dv5 := readU64(p, 8*5)
|
||||
dk5 := dv5 ^ key64_161
|
||||
accs[4] += dv5
|
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32)
|
||||
|
||||
dv6 := readU64(p, 8*6)
|
||||
dk6 := dv6 ^ key64_169
|
||||
accs[7] += dv6
|
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32)
|
||||
|
||||
dv7 := readU64(p, 8*7)
|
||||
dk7 := dv7 ^ key64_177
|
||||
accs[6] += dv7
|
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func accumBlockScalar(accs *[8]u64, p, secret ptr) {
|
||||
if secret != key {
|
||||
accumBlockScalarSeed(accs, p, secret)
|
||||
return
|
||||
}
|
||||
// accs
|
||||
for i := 0; i < 16; i++ {
|
||||
dv0 := readU64(p, 8*0)
|
||||
dk0 := dv0 ^ readU64(secret, 8*0)
|
||||
accs[1] += dv0
|
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32)
|
||||
|
||||
dv1 := readU64(p, 8*1)
|
||||
dk1 := dv1 ^ readU64(secret, 8*1)
|
||||
accs[0] += dv1
|
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32)
|
||||
|
||||
dv2 := readU64(p, 8*2)
|
||||
dk2 := dv2 ^ readU64(secret, 8*2)
|
||||
accs[3] += dv2
|
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32)
|
||||
|
||||
dv3 := readU64(p, 8*3)
|
||||
dk3 := dv3 ^ readU64(secret, 8*3)
|
||||
accs[2] += dv3
|
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32)
|
||||
|
||||
dv4 := readU64(p, 8*4)
|
||||
dk4 := dv4 ^ readU64(secret, 8*4)
|
||||
accs[5] += dv4
|
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32)
|
||||
|
||||
dv5 := readU64(p, 8*5)
|
||||
dk5 := dv5 ^ readU64(secret, 8*5)
|
||||
accs[4] += dv5
|
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32)
|
||||
|
||||
dv6 := readU64(p, 8*6)
|
||||
dk6 := dv6 ^ readU64(secret, 8*6)
|
||||
accs[7] += dv6
|
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32)
|
||||
|
||||
dv7 := readU64(p, 8*7)
|
||||
dk7 := dv7 ^ readU64(secret, 8*7)
|
||||
accs[6] += dv7
|
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32)
|
||||
|
||||
p, secret = ptr(ui(p)+_stripe), ptr(ui(secret)+8)
|
||||
}
|
||||
|
||||
// scramble accs
|
||||
accs[0] ^= accs[0] >> 47
|
||||
accs[0] ^= key64_128
|
||||
accs[0] *= prime32_1
|
||||
|
||||
accs[1] ^= accs[1] >> 47
|
||||
accs[1] ^= key64_136
|
||||
accs[1] *= prime32_1
|
||||
|
||||
accs[2] ^= accs[2] >> 47
|
||||
accs[2] ^= key64_144
|
||||
accs[2] *= prime32_1
|
||||
|
||||
accs[3] ^= accs[3] >> 47
|
||||
accs[3] ^= key64_152
|
||||
accs[3] *= prime32_1
|
||||
|
||||
accs[4] ^= accs[4] >> 47
|
||||
accs[4] ^= key64_160
|
||||
accs[4] *= prime32_1
|
||||
|
||||
accs[5] ^= accs[5] >> 47
|
||||
accs[5] ^= key64_168
|
||||
accs[5] *= prime32_1
|
||||
|
||||
accs[6] ^= accs[6] >> 47
|
||||
accs[6] ^= key64_176
|
||||
accs[6] *= prime32_1
|
||||
|
||||
accs[7] ^= accs[7] >> 47
|
||||
accs[7] ^= key64_184
|
||||
accs[7] *= prime32_1
|
||||
}
|
||||
|
||||
// accumScalarSeed should be used with custom key.
|
||||
func accumScalarSeed(accs *[8]u64, p, secret ptr, l u64) {
|
||||
for l > _block {
|
||||
k := secret
|
||||
|
||||
// accs
|
||||
for i := 0; i < 16; i++ {
|
||||
dv0 := readU64(p, 8*0)
|
||||
dk0 := dv0 ^ readU64(k, 8*0)
|
||||
accs[1] += dv0
|
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32)
|
||||
|
||||
dv1 := readU64(p, 8*1)
|
||||
dk1 := dv1 ^ readU64(k, 8*1)
|
||||
accs[0] += dv1
|
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32)
|
||||
|
||||
dv2 := readU64(p, 8*2)
|
||||
dk2 := dv2 ^ readU64(k, 8*2)
|
||||
accs[3] += dv2
|
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32)
|
||||
|
||||
dv3 := readU64(p, 8*3)
|
||||
dk3 := dv3 ^ readU64(k, 8*3)
|
||||
accs[2] += dv3
|
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32)
|
||||
|
||||
dv4 := readU64(p, 8*4)
|
||||
dk4 := dv4 ^ readU64(k, 8*4)
|
||||
accs[5] += dv4
|
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32)
|
||||
|
||||
dv5 := readU64(p, 8*5)
|
||||
dk5 := dv5 ^ readU64(k, 8*5)
|
||||
accs[4] += dv5
|
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32)
|
||||
|
||||
dv6 := readU64(p, 8*6)
|
||||
dk6 := dv6 ^ readU64(k, 8*6)
|
||||
accs[7] += dv6
|
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32)
|
||||
|
||||
dv7 := readU64(p, 8*7)
|
||||
dk7 := dv7 ^ readU64(k, 8*7)
|
||||
accs[6] += dv7
|
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32)
|
||||
|
||||
l -= _stripe
|
||||
if l > 0 {
|
||||
p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8)
|
||||
}
|
||||
}
|
||||
|
||||
// scramble accs
|
||||
accs[0] ^= accs[0] >> 47
|
||||
accs[0] ^= readU64(secret, 128)
|
||||
accs[0] *= prime32_1
|
||||
|
||||
accs[1] ^= accs[1] >> 47
|
||||
accs[1] ^= readU64(secret, 136)
|
||||
accs[1] *= prime32_1
|
||||
|
||||
accs[2] ^= accs[2] >> 47
|
||||
accs[2] ^= readU64(secret, 144)
|
||||
accs[2] *= prime32_1
|
||||
|
||||
accs[3] ^= accs[3] >> 47
|
||||
accs[3] ^= readU64(secret, 152)
|
||||
accs[3] *= prime32_1
|
||||
|
||||
accs[4] ^= accs[4] >> 47
|
||||
accs[4] ^= readU64(secret, 160)
|
||||
accs[4] *= prime32_1
|
||||
|
||||
accs[5] ^= accs[5] >> 47
|
||||
accs[5] ^= readU64(secret, 168)
|
||||
accs[5] *= prime32_1
|
||||
|
||||
accs[6] ^= accs[6] >> 47
|
||||
accs[6] ^= readU64(secret, 176)
|
||||
accs[6] *= prime32_1
|
||||
|
||||
accs[7] ^= accs[7] >> 47
|
||||
accs[7] ^= readU64(secret, 184)
|
||||
accs[7] *= prime32_1
|
||||
}
|
||||
|
||||
if l > 0 {
|
||||
t, k := (l-1)/_stripe, secret
|
||||
|
||||
for i := u64(0); i < t; i++ {
|
||||
dv0 := readU64(p, 8*0)
|
||||
dk0 := dv0 ^ readU64(k, 8*0)
|
||||
accs[1] += dv0
|
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32)
|
||||
|
||||
dv1 := readU64(p, 8*1)
|
||||
dk1 := dv1 ^ readU64(k, 8*1)
|
||||
accs[0] += dv1
|
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32)
|
||||
|
||||
dv2 := readU64(p, 8*2)
|
||||
dk2 := dv2 ^ readU64(k, 8*2)
|
||||
accs[3] += dv2
|
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32)
|
||||
|
||||
dv3 := readU64(p, 8*3)
|
||||
dk3 := dv3 ^ readU64(k, 8*3)
|
||||
accs[2] += dv3
|
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32)
|
||||
|
||||
dv4 := readU64(p, 8*4)
|
||||
dk4 := dv4 ^ readU64(k, 8*4)
|
||||
accs[5] += dv4
|
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32)
|
||||
|
||||
dv5 := readU64(p, 8*5)
|
||||
dk5 := dv5 ^ readU64(k, 8*5)
|
||||
accs[4] += dv5
|
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32)
|
||||
|
||||
dv6 := readU64(p, 8*6)
|
||||
dk6 := dv6 ^ readU64(k, 8*6)
|
||||
accs[7] += dv6
|
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32)
|
||||
|
||||
dv7 := readU64(p, 8*7)
|
||||
dk7 := dv7 ^ readU64(k, 8*7)
|
||||
accs[6] += dv7
|
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32)
|
||||
|
||||
l -= _stripe
|
||||
if l > 0 {
|
||||
p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8)
|
||||
}
|
||||
}
|
||||
|
||||
if l > 0 {
|
||||
p = ptr(ui(p) - uintptr(_stripe-l))
|
||||
|
||||
dv0 := readU64(p, 8*0)
|
||||
dk0 := dv0 ^ readU64(secret, 121)
|
||||
accs[1] += dv0
|
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32)
|
||||
|
||||
dv1 := readU64(p, 8*1)
|
||||
dk1 := dv1 ^ readU64(secret, 129)
|
||||
accs[0] += dv1
|
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32)
|
||||
|
||||
dv2 := readU64(p, 8*2)
|
||||
dk2 := dv2 ^ readU64(secret, 137)
|
||||
accs[3] += dv2
|
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32)
|
||||
|
||||
dv3 := readU64(p, 8*3)
|
||||
dk3 := dv3 ^ readU64(secret, 145)
|
||||
accs[2] += dv3
|
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32)
|
||||
|
||||
dv4 := readU64(p, 8*4)
|
||||
dk4 := dv4 ^ readU64(secret, 153)
|
||||
accs[5] += dv4
|
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32)
|
||||
|
||||
dv5 := readU64(p, 8*5)
|
||||
dk5 := dv5 ^ readU64(secret, 161)
|
||||
accs[4] += dv5
|
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32)
|
||||
|
||||
dv6 := readU64(p, 8*6)
|
||||
dk6 := dv6 ^ readU64(secret, 169)
|
||||
accs[7] += dv6
|
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32)
|
||||
|
||||
dv7 := readU64(p, 8*7)
|
||||
dk7 := dv7 ^ readU64(secret, 177)
|
||||
accs[6] += dv7
|
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// accumBlockScalarSeed should be used with custom key.
|
||||
func accumBlockScalarSeed(accs *[8]u64, p, secret ptr) {
|
||||
// accs
|
||||
{
|
||||
secret := secret
|
||||
for i := 0; i < 16; i++ {
|
||||
dv0 := readU64(p, 8*0)
|
||||
dk0 := dv0 ^ readU64(secret, 8*0)
|
||||
accs[1] += dv0
|
||||
accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32)
|
||||
|
||||
dv1 := readU64(p, 8*1)
|
||||
dk1 := dv1 ^ readU64(secret, 8*1)
|
||||
accs[0] += dv1
|
||||
accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32)
|
||||
|
||||
dv2 := readU64(p, 8*2)
|
||||
dk2 := dv2 ^ readU64(secret, 8*2)
|
||||
accs[3] += dv2
|
||||
accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32)
|
||||
|
||||
dv3 := readU64(p, 8*3)
|
||||
dk3 := dv3 ^ readU64(secret, 8*3)
|
||||
accs[2] += dv3
|
||||
accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32)
|
||||
|
||||
dv4 := readU64(p, 8*4)
|
||||
dk4 := dv4 ^ readU64(secret, 8*4)
|
||||
accs[5] += dv4
|
||||
accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32)
|
||||
|
||||
dv5 := readU64(p, 8*5)
|
||||
dk5 := dv5 ^ readU64(secret, 8*5)
|
||||
accs[4] += dv5
|
||||
accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32)
|
||||
|
||||
dv6 := readU64(p, 8*6)
|
||||
dk6 := dv6 ^ readU64(secret, 8*6)
|
||||
accs[7] += dv6
|
||||
accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32)
|
||||
|
||||
dv7 := readU64(p, 8*7)
|
||||
dk7 := dv7 ^ readU64(secret, 8*7)
|
||||
accs[6] += dv7
|
||||
accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32)
|
||||
|
||||
p, secret = ptr(ui(p)+_stripe), ptr(ui(secret)+8)
|
||||
}
|
||||
}
|
||||
|
||||
// scramble accs
|
||||
accs[0] ^= accs[0] >> 47
|
||||
accs[0] ^= readU64(secret, 128)
|
||||
accs[0] *= prime32_1
|
||||
|
||||
accs[1] ^= accs[1] >> 47
|
||||
accs[1] ^= readU64(secret, 136)
|
||||
accs[1] *= prime32_1
|
||||
|
||||
accs[2] ^= accs[2] >> 47
|
||||
accs[2] ^= readU64(secret, 144)
|
||||
accs[2] *= prime32_1
|
||||
|
||||
accs[3] ^= accs[3] >> 47
|
||||
accs[3] ^= readU64(secret, 152)
|
||||
accs[3] *= prime32_1
|
||||
|
||||
accs[4] ^= accs[4] >> 47
|
||||
accs[4] ^= readU64(secret, 160)
|
||||
accs[4] *= prime32_1
|
||||
|
||||
accs[5] ^= accs[5] >> 47
|
||||
accs[5] ^= readU64(secret, 168)
|
||||
accs[5] *= prime32_1
|
||||
|
||||
accs[6] ^= accs[6] >> 47
|
||||
accs[6] ^= readU64(secret, 176)
|
||||
accs[6] *= prime32_1
|
||||
|
||||
accs[7] ^= accs[7] >> 47
|
||||
accs[7] ^= readU64(secret, 184)
|
||||
accs[7] *= prime32_1
|
||||
}
|
40
vendor/github.com/zeebo/xxh3/accum_stubs_amd64.go
generated
vendored
40
vendor/github.com/zeebo/xxh3/accum_stubs_amd64.go
generated
vendored
@ -1,40 +0,0 @@
|
||||
package xxh3
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
|
||||
"github.com/klauspost/cpuid/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
hasAVX2 = cpuid.CPU.Has(cpuid.AVX2)
|
||||
hasSSE2 = cpuid.CPU.Has(cpuid.SSE2) // Always true on amd64
|
||||
hasAVX512 = cpuid.CPU.Has(cpuid.AVX512F)
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
func accumAVX2(acc *[8]u64, data, key unsafe.Pointer, len u64)
|
||||
|
||||
//go:noescape
|
||||
func accumAVX512(acc *[8]u64, data, key unsafe.Pointer, len u64)
|
||||
|
||||
//go:noescape
|
||||
func accumSSE(acc *[8]u64, data, key unsafe.Pointer, len u64)
|
||||
|
||||
//go:noescape
|
||||
func accumBlockAVX2(acc *[8]u64, data, key unsafe.Pointer)
|
||||
|
||||
//go:noescape
|
||||
func accumBlockSSE(acc *[8]u64, data, key unsafe.Pointer)
|
||||
|
||||
func withOverrides(avx512, avx2, sse2 bool, cb func()) {
|
||||
avx512Orig, avx2Orig, sse2Orig := hasAVX512, hasAVX2, hasSSE2
|
||||
hasAVX512, hasAVX2, hasSSE2 = avx512, avx2, sse2
|
||||
defer func() { hasAVX512, hasAVX2, hasSSE2 = avx512Orig, avx2Orig, sse2Orig }()
|
||||
cb()
|
||||
}
|
||||
|
||||
func withAVX512(cb func()) { withOverrides(hasAVX512, false, false, cb) }
|
||||
func withAVX2(cb func()) { withOverrides(false, hasAVX2, false, cb) }
|
||||
func withSSE2(cb func()) { withOverrides(false, false, hasSSE2, cb) }
|
||||
func withGeneric(cb func()) { withOverrides(false, false, false, cb) }
|
25
vendor/github.com/zeebo/xxh3/accum_stubs_other.go
generated
vendored
25
vendor/github.com/zeebo/xxh3/accum_stubs_other.go
generated
vendored
@ -1,25 +0,0 @@
|
||||
//go:build !amd64
|
||||
// +build !amd64
|
||||
|
||||
package xxh3
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const (
|
||||
hasAVX2 = false
|
||||
hasSSE2 = false
|
||||
hasAVX512 = false
|
||||
)
|
||||
|
||||
func accumAVX2(acc *[8]u64, data, key unsafe.Pointer, len u64) { panic("unreachable") }
|
||||
func accumSSE(acc *[8]u64, data, key unsafe.Pointer, len u64) { panic("unreachable") }
|
||||
func accumBlockAVX2(acc *[8]u64, data, key unsafe.Pointer) { panic("unreachable") }
|
||||
func accumBlockSSE(acc *[8]u64, data, key unsafe.Pointer) { panic("unreachable") }
|
||||
func accumAVX512(acc *[8]u64, data, key unsafe.Pointer, len u64) { panic("unreachable") }
|
||||
|
||||
func withAVX512(cb func()) { cb() }
|
||||
func withAVX2(cb func()) { cb() }
|
||||
func withSSE2(cb func()) { cb() }
|
||||
func withGeneric(cb func()) { cb() }
|
379
vendor/github.com/zeebo/xxh3/accum_vector_avx512_amd64.s
generated
vendored
379
vendor/github.com/zeebo/xxh3/accum_vector_avx512_amd64.s
generated
vendored
@ -1,379 +0,0 @@
|
||||
// Code generated by command: go run gen.go -avx512 -out ../accum_vector_avx512_amd64.s -pkg xxh3. DO NOT EDIT.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA prime_avx512<>+0(SB)/8, $0x000000009e3779b1
|
||||
DATA prime_avx512<>+8(SB)/8, $0x000000009e3779b1
|
||||
DATA prime_avx512<>+16(SB)/8, $0x000000009e3779b1
|
||||
DATA prime_avx512<>+24(SB)/8, $0x000000009e3779b1
|
||||
DATA prime_avx512<>+32(SB)/8, $0x000000009e3779b1
|
||||
DATA prime_avx512<>+40(SB)/8, $0x000000009e3779b1
|
||||
DATA prime_avx512<>+48(SB)/8, $0x000000009e3779b1
|
||||
DATA prime_avx512<>+56(SB)/8, $0x000000009e3779b1
|
||||
GLOBL prime_avx512<>(SB), RODATA|NOPTR, $64
|
||||
|
||||
// func accumAVX512(acc *[8]uint64, data *byte, key *byte, len uint64)
|
||||
// Requires: AVX, AVX512F, MMX+
|
||||
TEXT ·accumAVX512(SB), NOSPLIT, $0-32
|
||||
MOVQ acc+0(FP), AX
|
||||
MOVQ data+8(FP), CX
|
||||
MOVQ key+16(FP), DX
|
||||
MOVQ len+24(FP), BX
|
||||
VMOVDQU64 (AX), Z1
|
||||
VMOVDQU64 prime_avx512<>+0(SB), Z0
|
||||
VMOVDQU64 (DX), Z2
|
||||
VMOVDQU64 8(DX), Z3
|
||||
VMOVDQU64 16(DX), Z4
|
||||
VMOVDQU64 24(DX), Z5
|
||||
VMOVDQU64 32(DX), Z6
|
||||
VMOVDQU64 40(DX), Z7
|
||||
VMOVDQU64 48(DX), Z8
|
||||
VMOVDQU64 56(DX), Z9
|
||||
VMOVDQU64 64(DX), Z10
|
||||
VMOVDQU64 72(DX), Z11
|
||||
VMOVDQU64 80(DX), Z12
|
||||
VMOVDQU64 88(DX), Z13
|
||||
VMOVDQU64 96(DX), Z14
|
||||
VMOVDQU64 104(DX), Z15
|
||||
VMOVDQU64 112(DX), Z16
|
||||
VMOVDQU64 120(DX), Z17
|
||||
VMOVDQU64 128(DX), Z18
|
||||
VMOVDQU64 121(DX), Z19
|
||||
|
||||
accum_large:
|
||||
CMPQ BX, $0x00000400
|
||||
JLE accum
|
||||
VMOVDQU64 (CX), Z20
|
||||
PREFETCHT0 1024(CX)
|
||||
VPXORD Z2, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 64(CX), Z20
|
||||
PREFETCHT0 1088(CX)
|
||||
VPXORD Z3, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 128(CX), Z20
|
||||
PREFETCHT0 1152(CX)
|
||||
VPXORD Z4, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 192(CX), Z20
|
||||
PREFETCHT0 1216(CX)
|
||||
VPXORD Z5, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 256(CX), Z20
|
||||
PREFETCHT0 1280(CX)
|
||||
VPXORD Z6, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 320(CX), Z20
|
||||
PREFETCHT0 1344(CX)
|
||||
VPXORD Z7, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 384(CX), Z20
|
||||
PREFETCHT0 1408(CX)
|
||||
VPXORD Z8, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 448(CX), Z20
|
||||
PREFETCHT0 1472(CX)
|
||||
VPXORD Z9, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 512(CX), Z20
|
||||
PREFETCHT0 1536(CX)
|
||||
VPXORD Z10, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 576(CX), Z20
|
||||
PREFETCHT0 1600(CX)
|
||||
VPXORD Z11, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 640(CX), Z20
|
||||
PREFETCHT0 1664(CX)
|
||||
VPXORD Z12, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 704(CX), Z20
|
||||
PREFETCHT0 1728(CX)
|
||||
VPXORD Z13, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 768(CX), Z20
|
||||
PREFETCHT0 1792(CX)
|
||||
VPXORD Z14, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 832(CX), Z20
|
||||
PREFETCHT0 1856(CX)
|
||||
VPXORD Z15, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 896(CX), Z20
|
||||
PREFETCHT0 1920(CX)
|
||||
VPXORD Z16, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
VMOVDQU64 960(CX), Z20
|
||||
PREFETCHT0 1984(CX)
|
||||
VPXORD Z17, Z20, Z21
|
||||
VPSHUFD $0x31, Z21, Z22
|
||||
VPMULUDQ Z21, Z22, Z21
|
||||
VPSHUFD $0x4e, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
VPADDQ Z1, Z21, Z1
|
||||
ADDQ $0x00000400, CX
|
||||
SUBQ $0x00000400, BX
|
||||
VPSRLQ $0x2f, Z1, Z20
|
||||
VPTERNLOGD $0x96, Z1, Z18, Z20
|
||||
VPMULUDQ Z0, Z20, Z1
|
||||
VPSHUFD $0xf5, Z20, Z20
|
||||
VPMULUDQ Z0, Z20, Z20
|
||||
VPSLLQ $0x20, Z20, Z20
|
||||
VPADDQ Z1, Z20, Z1
|
||||
JMP accum_large
|
||||
|
||||
accum:
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z2, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z18
|
||||
VPMULUDQ Z2, Z18, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z3, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z4, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z5, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z6, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z7, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z8, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z9, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z10, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z11, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z12, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z13, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z14, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z15, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z16, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
CMPQ BX, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z17, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, BX
|
||||
|
||||
finalize:
|
||||
CMPQ BX, $0x00
|
||||
JE return
|
||||
SUBQ $0x40, CX
|
||||
ADDQ BX, CX
|
||||
VMOVDQU64 (CX), Z0
|
||||
VPXORD Z19, Z0, Z2
|
||||
VPSHUFD $0x31, Z2, Z3
|
||||
VPMULUDQ Z2, Z3, Z2
|
||||
VPSHUFD $0x4e, Z0, Z0
|
||||
VPADDQ Z1, Z0, Z1
|
||||
VPADDQ Z1, Z2, Z1
|
||||
|
||||
return:
|
||||
VMOVDQU64 Z1, (AX)
|
||||
VZEROUPPER
|
||||
RET
|
586
vendor/github.com/zeebo/xxh3/accum_vector_avx_amd64.s
generated
vendored
586
vendor/github.com/zeebo/xxh3/accum_vector_avx_amd64.s
generated
vendored
@ -1,586 +0,0 @@
|
||||
// Code generated by command: go run gen.go -avx -out ../accum_vector_avx_amd64.s -pkg xxh3. DO NOT EDIT.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA prime_avx<>+0(SB)/8, $0x000000009e3779b1
|
||||
DATA prime_avx<>+8(SB)/8, $0x000000009e3779b1
|
||||
DATA prime_avx<>+16(SB)/8, $0x000000009e3779b1
|
||||
DATA prime_avx<>+24(SB)/8, $0x000000009e3779b1
|
||||
GLOBL prime_avx<>(SB), RODATA|NOPTR, $32
|
||||
|
||||
// func accumAVX2(acc *[8]uint64, data *byte, key *byte, len uint64)
|
||||
// Requires: AVX, AVX2, MMX+
|
||||
TEXT ·accumAVX2(SB), NOSPLIT, $0-32
|
||||
MOVQ acc+0(FP), AX
|
||||
MOVQ data+8(FP), CX
|
||||
MOVQ key+16(FP), DX
|
||||
MOVQ key+16(FP), BX
|
||||
MOVQ len+24(FP), SI
|
||||
VMOVDQU (AX), Y1
|
||||
VMOVDQU 32(AX), Y2
|
||||
VMOVDQU prime_avx<>+0(SB), Y0
|
||||
|
||||
accum_large:
|
||||
CMPQ SI, $0x00000400
|
||||
JLE accum
|
||||
VMOVDQU (CX), Y3
|
||||
VMOVDQU 32(CX), Y6
|
||||
PREFETCHT0 512(CX)
|
||||
VPXOR (DX), Y3, Y4
|
||||
VPXOR 32(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 64(CX), Y3
|
||||
VMOVDQU 96(CX), Y6
|
||||
PREFETCHT0 576(CX)
|
||||
VPXOR 8(DX), Y3, Y4
|
||||
VPXOR 40(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 128(CX), Y3
|
||||
VMOVDQU 160(CX), Y6
|
||||
PREFETCHT0 640(CX)
|
||||
VPXOR 16(DX), Y3, Y4
|
||||
VPXOR 48(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 192(CX), Y3
|
||||
VMOVDQU 224(CX), Y6
|
||||
PREFETCHT0 704(CX)
|
||||
VPXOR 24(DX), Y3, Y4
|
||||
VPXOR 56(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 256(CX), Y3
|
||||
VMOVDQU 288(CX), Y6
|
||||
PREFETCHT0 768(CX)
|
||||
VPXOR 32(DX), Y3, Y4
|
||||
VPXOR 64(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 320(CX), Y3
|
||||
VMOVDQU 352(CX), Y6
|
||||
PREFETCHT0 832(CX)
|
||||
VPXOR 40(DX), Y3, Y4
|
||||
VPXOR 72(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 384(CX), Y3
|
||||
VMOVDQU 416(CX), Y6
|
||||
PREFETCHT0 896(CX)
|
||||
VPXOR 48(DX), Y3, Y4
|
||||
VPXOR 80(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 448(CX), Y3
|
||||
VMOVDQU 480(CX), Y6
|
||||
PREFETCHT0 960(CX)
|
||||
VPXOR 56(DX), Y3, Y4
|
||||
VPXOR 88(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 512(CX), Y3
|
||||
VMOVDQU 544(CX), Y6
|
||||
PREFETCHT0 1024(CX)
|
||||
VPXOR 64(DX), Y3, Y4
|
||||
VPXOR 96(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 576(CX), Y3
|
||||
VMOVDQU 608(CX), Y6
|
||||
PREFETCHT0 1088(CX)
|
||||
VPXOR 72(DX), Y3, Y4
|
||||
VPXOR 104(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 640(CX), Y3
|
||||
VMOVDQU 672(CX), Y6
|
||||
PREFETCHT0 1152(CX)
|
||||
VPXOR 80(DX), Y3, Y4
|
||||
VPXOR 112(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 704(CX), Y3
|
||||
VMOVDQU 736(CX), Y6
|
||||
PREFETCHT0 1216(CX)
|
||||
VPXOR 88(DX), Y3, Y4
|
||||
VPXOR 120(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 768(CX), Y3
|
||||
VMOVDQU 800(CX), Y6
|
||||
PREFETCHT0 1280(CX)
|
||||
VPXOR 96(DX), Y3, Y4
|
||||
VPXOR 128(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 832(CX), Y3
|
||||
VMOVDQU 864(CX), Y6
|
||||
PREFETCHT0 1344(CX)
|
||||
VPXOR 104(DX), Y3, Y4
|
||||
VPXOR 136(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 896(CX), Y3
|
||||
VMOVDQU 928(CX), Y6
|
||||
PREFETCHT0 1408(CX)
|
||||
VPXOR 112(DX), Y3, Y4
|
||||
VPXOR 144(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 960(CX), Y3
|
||||
VMOVDQU 992(CX), Y6
|
||||
PREFETCHT0 1472(CX)
|
||||
VPXOR 120(DX), Y3, Y4
|
||||
VPXOR 152(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
ADDQ $0x00000400, CX
|
||||
SUBQ $0x00000400, SI
|
||||
VPSRLQ $0x2f, Y1, Y3
|
||||
VPXOR Y1, Y3, Y3
|
||||
VPXOR 128(DX), Y3, Y3
|
||||
VPMULUDQ Y0, Y3, Y1
|
||||
VPSHUFD $0xf5, Y3, Y3
|
||||
VPMULUDQ Y0, Y3, Y3
|
||||
VPSLLQ $0x20, Y3, Y3
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPSRLQ $0x2f, Y2, Y3
|
||||
VPXOR Y2, Y3, Y3
|
||||
VPXOR 160(DX), Y3, Y3
|
||||
VPMULUDQ Y0, Y3, Y2
|
||||
VPSHUFD $0xf5, Y3, Y3
|
||||
VPMULUDQ Y0, Y3, Y3
|
||||
VPSLLQ $0x20, Y3, Y3
|
||||
VPADDQ Y2, Y3, Y2
|
||||
JMP accum_large
|
||||
|
||||
accum:
|
||||
CMPQ SI, $0x40
|
||||
JLE finalize
|
||||
VMOVDQU (CX), Y0
|
||||
VMOVDQU 32(CX), Y5
|
||||
VPXOR (BX), Y0, Y3
|
||||
VPXOR 32(BX), Y5, Y6
|
||||
VPSHUFD $0x31, Y3, Y4
|
||||
VPSHUFD $0x31, Y6, Y7
|
||||
VPMULUDQ Y3, Y4, Y3
|
||||
VPMULUDQ Y6, Y7, Y6
|
||||
VPSHUFD $0x4e, Y0, Y0
|
||||
VPSHUFD $0x4e, Y5, Y5
|
||||
VPADDQ Y1, Y0, Y1
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y2, Y5, Y2
|
||||
VPADDQ Y2, Y6, Y2
|
||||
ADDQ $0x00000040, CX
|
||||
SUBQ $0x00000040, SI
|
||||
ADDQ $0x00000008, BX
|
||||
JMP accum
|
||||
|
||||
finalize:
|
||||
CMPQ SI, $0x00
|
||||
JE return
|
||||
SUBQ $0x40, CX
|
||||
ADDQ SI, CX
|
||||
VMOVDQU (CX), Y0
|
||||
VMOVDQU 32(CX), Y5
|
||||
VPXOR 121(DX), Y0, Y3
|
||||
VPXOR 153(DX), Y5, Y6
|
||||
VPSHUFD $0x31, Y3, Y4
|
||||
VPSHUFD $0x31, Y6, Y7
|
||||
VPMULUDQ Y3, Y4, Y3
|
||||
VPMULUDQ Y6, Y7, Y6
|
||||
VPSHUFD $0x4e, Y0, Y0
|
||||
VPSHUFD $0x4e, Y5, Y5
|
||||
VPADDQ Y1, Y0, Y1
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y2, Y5, Y2
|
||||
VPADDQ Y2, Y6, Y2
|
||||
|
||||
return:
|
||||
VMOVDQU Y1, (AX)
|
||||
VMOVDQU Y2, 32(AX)
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
// func accumBlockAVX2(acc *[8]uint64, data *byte, key *byte)
|
||||
// Requires: AVX, AVX2
|
||||
TEXT ·accumBlockAVX2(SB), NOSPLIT, $0-24
|
||||
MOVQ acc+0(FP), AX
|
||||
MOVQ data+8(FP), CX
|
||||
MOVQ key+16(FP), DX
|
||||
VMOVDQU (AX), Y1
|
||||
VMOVDQU 32(AX), Y2
|
||||
VMOVDQU prime_avx<>+0(SB), Y0
|
||||
VMOVDQU (CX), Y3
|
||||
VMOVDQU 32(CX), Y6
|
||||
VPXOR (DX), Y3, Y4
|
||||
VPXOR 32(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 64(CX), Y3
|
||||
VMOVDQU 96(CX), Y6
|
||||
VPXOR 8(DX), Y3, Y4
|
||||
VPXOR 40(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 128(CX), Y3
|
||||
VMOVDQU 160(CX), Y6
|
||||
VPXOR 16(DX), Y3, Y4
|
||||
VPXOR 48(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 192(CX), Y3
|
||||
VMOVDQU 224(CX), Y6
|
||||
VPXOR 24(DX), Y3, Y4
|
||||
VPXOR 56(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 256(CX), Y3
|
||||
VMOVDQU 288(CX), Y6
|
||||
VPXOR 32(DX), Y3, Y4
|
||||
VPXOR 64(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 320(CX), Y3
|
||||
VMOVDQU 352(CX), Y6
|
||||
VPXOR 40(DX), Y3, Y4
|
||||
VPXOR 72(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 384(CX), Y3
|
||||
VMOVDQU 416(CX), Y6
|
||||
VPXOR 48(DX), Y3, Y4
|
||||
VPXOR 80(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 448(CX), Y3
|
||||
VMOVDQU 480(CX), Y6
|
||||
VPXOR 56(DX), Y3, Y4
|
||||
VPXOR 88(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 512(CX), Y3
|
||||
VMOVDQU 544(CX), Y6
|
||||
VPXOR 64(DX), Y3, Y4
|
||||
VPXOR 96(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 576(CX), Y3
|
||||
VMOVDQU 608(CX), Y6
|
||||
VPXOR 72(DX), Y3, Y4
|
||||
VPXOR 104(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 640(CX), Y3
|
||||
VMOVDQU 672(CX), Y6
|
||||
VPXOR 80(DX), Y3, Y4
|
||||
VPXOR 112(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 704(CX), Y3
|
||||
VMOVDQU 736(CX), Y6
|
||||
VPXOR 88(DX), Y3, Y4
|
||||
VPXOR 120(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 768(CX), Y3
|
||||
VMOVDQU 800(CX), Y6
|
||||
VPXOR 96(DX), Y3, Y4
|
||||
VPXOR 128(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 832(CX), Y3
|
||||
VMOVDQU 864(CX), Y6
|
||||
VPXOR 104(DX), Y3, Y4
|
||||
VPXOR 136(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 896(CX), Y3
|
||||
VMOVDQU 928(CX), Y6
|
||||
VPXOR 112(DX), Y3, Y4
|
||||
VPXOR 144(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VMOVDQU 960(CX), Y3
|
||||
VMOVDQU 992(CX), Y6
|
||||
VPXOR 120(DX), Y3, Y4
|
||||
VPXOR 152(DX), Y6, Y7
|
||||
VPSHUFD $0x31, Y4, Y5
|
||||
VPSHUFD $0x31, Y7, Y8
|
||||
VPMULUDQ Y4, Y5, Y4
|
||||
VPMULUDQ Y7, Y8, Y7
|
||||
VPSHUFD $0x4e, Y3, Y3
|
||||
VPSHUFD $0x4e, Y6, Y6
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPADDQ Y1, Y4, Y1
|
||||
VPADDQ Y2, Y6, Y2
|
||||
VPADDQ Y2, Y7, Y2
|
||||
VPSRLQ $0x2f, Y1, Y3
|
||||
VPXOR Y1, Y3, Y3
|
||||
VPXOR 128(DX), Y3, Y3
|
||||
VPMULUDQ Y0, Y3, Y1
|
||||
VPSHUFD $0xf5, Y3, Y3
|
||||
VPMULUDQ Y0, Y3, Y3
|
||||
VPSLLQ $0x20, Y3, Y3
|
||||
VPADDQ Y1, Y3, Y1
|
||||
VPSRLQ $0x2f, Y2, Y3
|
||||
VPXOR Y2, Y3, Y3
|
||||
VPXOR 160(DX), Y3, Y3
|
||||
VPMULUDQ Y0, Y3, Y2
|
||||
VPSHUFD $0xf5, Y3, Y3
|
||||
VPMULUDQ Y0, Y3, Y3
|
||||
VPSLLQ $0x20, Y3, Y3
|
||||
VPADDQ Y2, Y3, Y2
|
||||
VMOVDQU Y1, (AX)
|
||||
VMOVDQU Y2, 32(AX)
|
||||
VZEROUPPER
|
||||
RET
|
1236
vendor/github.com/zeebo/xxh3/accum_vector_sse_amd64.s
generated
vendored
1236
vendor/github.com/zeebo/xxh3/accum_vector_sse_amd64.s
generated
vendored
File diff suppressed because it is too large
Load Diff
97
vendor/github.com/zeebo/xxh3/consts.go
generated
vendored
97
vendor/github.com/zeebo/xxh3/consts.go
generated
vendored
@ -1,97 +0,0 @@
|
||||
package xxh3
|
||||
|
||||
const (
|
||||
_stripe = 64
|
||||
_block = 1024
|
||||
|
||||
prime32_1 = 2654435761
|
||||
prime32_2 = 2246822519
|
||||
prime32_3 = 3266489917
|
||||
|
||||
prime64_1 = 11400714785074694791
|
||||
prime64_2 = 14029467366897019727
|
||||
prime64_3 = 1609587929392839161
|
||||
prime64_4 = 9650029242287828579
|
||||
prime64_5 = 2870177450012600261
|
||||
)
|
||||
|
||||
var key = ptr(&[...]u8{
|
||||
0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe /* 8 */, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, /* 16 */
|
||||
0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb /* 24 */, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, /* 32 */
|
||||
0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78 /* 40 */, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, /* 48 */
|
||||
0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e /* 56 */, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, /* 64 */
|
||||
0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb /* 72 */, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, /* 80 */
|
||||
0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e /* 88 */, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, /* 96 */
|
||||
0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f /* 104 */, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, /* 112 */
|
||||
0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31 /* 120 */, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, /* 128 */
|
||||
0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3 /* 136 */, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, /* 144 */
|
||||
0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49 /* 152 */, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, /* 160 */
|
||||
0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc /* 168 */, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, /* 176 */
|
||||
0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28 /* 184 */, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, /* 192 */
|
||||
})
|
||||
|
||||
const (
|
||||
key64_000 u64 = 0xbe4ba423396cfeb8
|
||||
key64_008 u64 = 0x1cad21f72c81017c
|
||||
key64_016 u64 = 0xdb979083e96dd4de
|
||||
key64_024 u64 = 0x1f67b3b7a4a44072
|
||||
key64_032 u64 = 0x78e5c0cc4ee679cb
|
||||
key64_040 u64 = 0x2172ffcc7dd05a82
|
||||
key64_048 u64 = 0x8e2443f7744608b8
|
||||
key64_056 u64 = 0x4c263a81e69035e0
|
||||
key64_064 u64 = 0xcb00c391bb52283c
|
||||
key64_072 u64 = 0xa32e531b8b65d088
|
||||
key64_080 u64 = 0x4ef90da297486471
|
||||
key64_088 u64 = 0xd8acdea946ef1938
|
||||
key64_096 u64 = 0x3f349ce33f76faa8
|
||||
key64_104 u64 = 0x1d4f0bc7c7bbdcf9
|
||||
key64_112 u64 = 0x3159b4cd4be0518a
|
||||
key64_120 u64 = 0x647378d9c97e9fc8
|
||||
key64_128 u64 = 0xc3ebd33483acc5ea
|
||||
key64_136 u64 = 0xeb6313faffa081c5
|
||||
key64_144 u64 = 0x49daf0b751dd0d17
|
||||
key64_152 u64 = 0x9e68d429265516d3
|
||||
key64_160 u64 = 0xfca1477d58be162b
|
||||
key64_168 u64 = 0xce31d07ad1b8f88f
|
||||
key64_176 u64 = 0x280416958f3acb45
|
||||
key64_184 u64 = 0x7e404bbbcafbd7af
|
||||
|
||||
key64_103 u64 = 0x4f0bc7c7bbdcf93f
|
||||
key64_111 u64 = 0x59b4cd4be0518a1d
|
||||
key64_119 u64 = 0x7378d9c97e9fc831
|
||||
key64_127 u64 = 0xebd33483acc5ea64
|
||||
|
||||
key64_121 u64 = 0xea647378d9c97e9f
|
||||
key64_129 u64 = 0xc5c3ebd33483acc5
|
||||
key64_137 u64 = 0x17eb6313faffa081
|
||||
key64_145 u64 = 0xd349daf0b751dd0d
|
||||
key64_153 u64 = 0x2b9e68d429265516
|
||||
key64_161 u64 = 0x8ffca1477d58be16
|
||||
key64_169 u64 = 0x45ce31d07ad1b8f8
|
||||
key64_177 u64 = 0xaf280416958f3acb
|
||||
|
||||
key64_011 = 0x6dd4de1cad21f72c
|
||||
key64_019 = 0xa44072db979083e9
|
||||
key64_027 = 0xe679cb1f67b3b7a4
|
||||
key64_035 = 0xd05a8278e5c0cc4e
|
||||
key64_043 = 0x4608b82172ffcc7d
|
||||
key64_051 = 0x9035e08e2443f774
|
||||
key64_059 = 0x52283c4c263a81e6
|
||||
key64_067 = 0x65d088cb00c391bb
|
||||
|
||||
key64_117 = 0xd9c97e9fc83159b4
|
||||
key64_125 = 0x3483acc5ea647378
|
||||
key64_133 = 0xfaffa081c5c3ebd3
|
||||
key64_141 = 0xb751dd0d17eb6313
|
||||
key64_149 = 0x29265516d349daf0
|
||||
key64_157 = 0x7d58be162b9e68d4
|
||||
key64_165 = 0x7ad1b8f88ffca147
|
||||
key64_173 = 0x958f3acb45ce31d0
|
||||
)
|
||||
|
||||
const (
|
||||
key32_000 u32 = 0xbe4ba423
|
||||
key32_004 u32 = 0x396cfeb8
|
||||
key32_008 u32 = 0x1cad21f7
|
||||
key32_012 u32 = 0x2c81017c
|
||||
)
|
253
vendor/github.com/zeebo/xxh3/hash128.go
generated
vendored
253
vendor/github.com/zeebo/xxh3/hash128.go
generated
vendored
@ -1,253 +0,0 @@
|
||||
package xxh3
|
||||
|
||||
import (
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
// Hash128 returns the 128-bit hash of the byte slice.
|
||||
func Hash128(b []byte) Uint128 {
|
||||
return hashAny128(*(*str)(ptr(&b)))
|
||||
}
|
||||
|
||||
// HashString128 returns the 128-bit hash of the string slice.
|
||||
func HashString128(s string) Uint128 {
|
||||
return hashAny128(*(*str)(ptr(&s)))
|
||||
}
|
||||
|
||||
func hashAny128(s str) (acc u128) {
|
||||
p, l := s.p, s.l
|
||||
|
||||
switch {
|
||||
case l <= 16:
|
||||
switch {
|
||||
case l > 8: // 9-16
|
||||
const bitflipl = key64_032 ^ key64_040
|
||||
const bitfliph = key64_048 ^ key64_056
|
||||
|
||||
input_lo := readU64(p, 0)
|
||||
input_hi := readU64(p, ui(l)-8)
|
||||
|
||||
m128_h, m128_l := bits.Mul64(input_lo^input_hi^bitflipl, prime64_1)
|
||||
|
||||
m128_l += uint64(l-1) << 54
|
||||
input_hi ^= bitfliph
|
||||
|
||||
m128_h += input_hi + uint64(uint32(input_hi))*(prime32_2-1)
|
||||
|
||||
m128_l ^= bits.ReverseBytes64(m128_h)
|
||||
|
||||
acc.Hi, acc.Lo = bits.Mul64(m128_l, prime64_2)
|
||||
acc.Hi += m128_h * prime64_2
|
||||
|
||||
acc.Lo = xxh3Avalanche(acc.Lo)
|
||||
acc.Hi = xxh3Avalanche(acc.Hi)
|
||||
|
||||
return acc
|
||||
|
||||
case l > 3: // 4-8
|
||||
const bitflip = key64_016 ^ key64_024
|
||||
|
||||
input_lo := readU32(p, 0)
|
||||
input_hi := readU32(p, ui(l)-4)
|
||||
input_64 := u64(input_lo) + u64(input_hi)<<32
|
||||
keyed := input_64 ^ bitflip
|
||||
|
||||
acc.Hi, acc.Lo = bits.Mul64(keyed, prime64_1+(uint64(l)<<2))
|
||||
|
||||
acc.Hi += acc.Lo << 1
|
||||
acc.Lo ^= acc.Hi >> 3
|
||||
|
||||
acc.Lo ^= acc.Lo >> 35
|
||||
acc.Lo *= 0x9fb21c651e98df25
|
||||
acc.Lo ^= acc.Lo >> 28
|
||||
acc.Hi = xxh3Avalanche(acc.Hi)
|
||||
|
||||
return acc
|
||||
|
||||
case l == 3: // 3
|
||||
c12 := u64(readU16(p, 0))
|
||||
c3 := u64(readU8(p, 2))
|
||||
acc.Lo = c12<<16 + c3 + 3<<8
|
||||
|
||||
case l > 1: // 2
|
||||
c12 := u64(readU16(p, 0))
|
||||
acc.Lo = c12*(1<<24+1)>>8 + 2<<8
|
||||
|
||||
case l == 1: // 1
|
||||
c1 := u64(readU8(p, 0))
|
||||
acc.Lo = c1*(1<<24+1<<16+1) + 1<<8
|
||||
|
||||
default: // 0
|
||||
return u128{0x99aa06d3014798d8, 0x6001c324468d497f}
|
||||
}
|
||||
|
||||
acc.Hi = uint64(bits.RotateLeft32(bits.ReverseBytes32(uint32(acc.Lo)), 13))
|
||||
acc.Lo ^= uint64(key32_000 ^ key32_004)
|
||||
acc.Hi ^= uint64(key32_008 ^ key32_012)
|
||||
|
||||
acc.Lo = xxh64AvalancheSmall(acc.Lo)
|
||||
acc.Hi = xxh64AvalancheSmall(acc.Hi)
|
||||
|
||||
return acc
|
||||
|
||||
case l <= 128:
|
||||
acc.Lo = u64(l) * prime64_1
|
||||
|
||||
if l > 32 {
|
||||
if l > 64 {
|
||||
if l > 96 {
|
||||
in8, in7 := readU64(p, ui(l)-8*8), readU64(p, ui(l)-7*8)
|
||||
i6, i7 := readU64(p, 6*8), readU64(p, 7*8)
|
||||
|
||||
acc.Hi += mulFold64(in8^key64_112, in7^key64_120)
|
||||
acc.Hi ^= i6 + i7
|
||||
acc.Lo += mulFold64(i6^key64_096, i7^key64_104)
|
||||
acc.Lo ^= in8 + in7
|
||||
|
||||
} // 96
|
||||
|
||||
in6, in5 := readU64(p, ui(l)-6*8), readU64(p, ui(l)-5*8)
|
||||
i4, i5 := readU64(p, 4*8), readU64(p, 5*8)
|
||||
|
||||
acc.Hi += mulFold64(in6^key64_080, in5^key64_088)
|
||||
acc.Hi ^= i4 + i5
|
||||
acc.Lo += mulFold64(i4^key64_064, i5^key64_072)
|
||||
acc.Lo ^= in6 + in5
|
||||
|
||||
} // 64
|
||||
|
||||
in4, in3 := readU64(p, ui(l)-4*8), readU64(p, ui(l)-3*8)
|
||||
i2, i3 := readU64(p, 2*8), readU64(p, 3*8)
|
||||
|
||||
acc.Hi += mulFold64(in4^key64_048, in3^key64_056)
|
||||
acc.Hi ^= i2 + i3
|
||||
acc.Lo += mulFold64(i2^key64_032, i3^key64_040)
|
||||
acc.Lo ^= in4 + in3
|
||||
|
||||
} // 32
|
||||
|
||||
in2, in1 := readU64(p, ui(l)-2*8), readU64(p, ui(l)-1*8)
|
||||
i0, i1 := readU64(p, 0*8), readU64(p, 1*8)
|
||||
|
||||
acc.Hi += mulFold64(in2^key64_016, in1^key64_024)
|
||||
acc.Hi ^= i0 + i1
|
||||
acc.Lo += mulFold64(i0^key64_000, i1^key64_008)
|
||||
acc.Lo ^= in2 + in1
|
||||
|
||||
acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+(u64(l)*prime64_2), acc.Hi+acc.Lo
|
||||
|
||||
acc.Hi = -xxh3Avalanche(acc.Hi)
|
||||
acc.Lo = xxh3Avalanche(acc.Lo)
|
||||
|
||||
return acc
|
||||
|
||||
case l <= 240:
|
||||
acc.Lo = u64(l) * prime64_1
|
||||
|
||||
{
|
||||
i0, i1, i2, i3 := readU64(p, 0*8), readU64(p, 1*8), readU64(p, 2*8), readU64(p, 3*8)
|
||||
|
||||
acc.Hi += mulFold64(i2^key64_016, i3^key64_024)
|
||||
acc.Hi ^= i0 + i1
|
||||
acc.Lo += mulFold64(i0^key64_000, i1^key64_008)
|
||||
acc.Lo ^= i2 + i3
|
||||
}
|
||||
|
||||
{
|
||||
i0, i1, i2, i3 := readU64(p, 4*8), readU64(p, 5*8), readU64(p, 6*8), readU64(p, 7*8)
|
||||
|
||||
acc.Hi += mulFold64(i2^key64_048, i3^key64_056)
|
||||
acc.Hi ^= i0 + i1
|
||||
acc.Lo += mulFold64(i0^key64_032, i1^key64_040)
|
||||
acc.Lo ^= i2 + i3
|
||||
}
|
||||
|
||||
{
|
||||
i0, i1, i2, i3 := readU64(p, 8*8), readU64(p, 9*8), readU64(p, 10*8), readU64(p, 11*8)
|
||||
|
||||
acc.Hi += mulFold64(i2^key64_080, i3^key64_088)
|
||||
acc.Hi ^= i0 + i1
|
||||
acc.Lo += mulFold64(i0^key64_064, i1^key64_072)
|
||||
acc.Lo ^= i2 + i3
|
||||
}
|
||||
|
||||
{
|
||||
i0, i1, i2, i3 := readU64(p, 12*8), readU64(p, 13*8), readU64(p, 14*8), readU64(p, 15*8)
|
||||
|
||||
acc.Hi += mulFold64(i2^key64_112, i3^key64_120)
|
||||
acc.Hi ^= i0 + i1
|
||||
acc.Lo += mulFold64(i0^key64_096, i1^key64_104)
|
||||
acc.Lo ^= i2 + i3
|
||||
}
|
||||
|
||||
// avalanche
|
||||
acc.Hi = xxh3Avalanche(acc.Hi)
|
||||
acc.Lo = xxh3Avalanche(acc.Lo)
|
||||
|
||||
// trailing groups after 128
|
||||
top := ui(l) &^ 31
|
||||
for i := ui(4 * 32); i < top; i += 32 {
|
||||
i0, i1, i2, i3 := readU64(p, i+0), readU64(p, i+8), readU64(p, i+16), readU64(p, i+24)
|
||||
k0, k1, k2, k3 := readU64(key, i-125), readU64(key, i-117), readU64(key, i-109), readU64(key, i-101)
|
||||
|
||||
acc.Hi += mulFold64(i2^k2, i3^k3)
|
||||
acc.Hi ^= i0 + i1
|
||||
acc.Lo += mulFold64(i0^k0, i1^k1)
|
||||
acc.Lo ^= i2 + i3
|
||||
}
|
||||
|
||||
// last 32 bytes
|
||||
{
|
||||
i0, i1, i2, i3 := readU64(p, ui(l)-32), readU64(p, ui(l)-24), readU64(p, ui(l)-16), readU64(p, ui(l)-8)
|
||||
|
||||
acc.Hi += mulFold64(i0^key64_119, i1^key64_127)
|
||||
acc.Hi ^= i2 + i3
|
||||
acc.Lo += mulFold64(i2^key64_103, i3^key64_111)
|
||||
acc.Lo ^= i0 + i1
|
||||
}
|
||||
|
||||
acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+(u64(l)*prime64_2), acc.Hi+acc.Lo
|
||||
|
||||
acc.Hi = -xxh3Avalanche(acc.Hi)
|
||||
acc.Lo = xxh3Avalanche(acc.Lo)
|
||||
|
||||
return acc
|
||||
|
||||
default:
|
||||
acc.Lo = u64(l) * prime64_1
|
||||
acc.Hi = ^(u64(l) * prime64_2)
|
||||
|
||||
accs := [8]u64{
|
||||
prime32_3, prime64_1, prime64_2, prime64_3,
|
||||
prime64_4, prime32_2, prime64_5, prime32_1,
|
||||
}
|
||||
|
||||
if hasAVX512 && l >= avx512Switch {
|
||||
accumAVX512(&accs, p, key, u64(l))
|
||||
} else if hasAVX2 {
|
||||
accumAVX2(&accs, p, key, u64(l))
|
||||
} else if hasSSE2 {
|
||||
accumSSE(&accs, p, key, u64(l))
|
||||
} else {
|
||||
accumScalar(&accs, p, key, u64(l))
|
||||
}
|
||||
|
||||
// merge accs
|
||||
acc.Lo += mulFold64(accs[0]^key64_011, accs[1]^key64_019)
|
||||
acc.Hi += mulFold64(accs[0]^key64_117, accs[1]^key64_125)
|
||||
|
||||
acc.Lo += mulFold64(accs[2]^key64_027, accs[3]^key64_035)
|
||||
acc.Hi += mulFold64(accs[2]^key64_133, accs[3]^key64_141)
|
||||
|
||||
acc.Lo += mulFold64(accs[4]^key64_043, accs[5]^key64_051)
|
||||
acc.Hi += mulFold64(accs[4]^key64_149, accs[5]^key64_157)
|
||||
|
||||
acc.Lo += mulFold64(accs[6]^key64_059, accs[7]^key64_067)
|
||||
acc.Hi += mulFold64(accs[6]^key64_165, accs[7]^key64_173)
|
||||
|
||||
acc.Lo = xxh3Avalanche(acc.Lo)
|
||||
acc.Hi = xxh3Avalanche(acc.Hi)
|
||||
|
||||
return acc
|
||||
}
|
||||
}
|
264
vendor/github.com/zeebo/xxh3/hash128_seed.go
generated
vendored
264
vendor/github.com/zeebo/xxh3/hash128_seed.go
generated
vendored
@ -1,264 +0,0 @@
|
||||
package xxh3
|
||||
|
||||
import (
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
// Hash128Seed returns the 128-bit hash of the byte slice.
|
||||
func Hash128Seed(b []byte, seed uint64) Uint128 {
|
||||
return hashAny128Seed(*(*str)(ptr(&b)), seed)
|
||||
}
|
||||
|
||||
// HashString128Seed returns the 128-bit hash of the string slice.
|
||||
func HashString128Seed(s string, seed uint64) Uint128 {
|
||||
return hashAny128Seed(*(*str)(ptr(&s)), seed)
|
||||
}
|
||||
|
||||
func hashAny128Seed(s str, seed uint64) (acc u128) {
|
||||
p, l := s.p, s.l
|
||||
|
||||
switch {
|
||||
case l <= 16:
|
||||
switch {
|
||||
case l > 8: // 9-16
|
||||
bitflipl := (key64_032 ^ key64_040) - seed
|
||||
bitfliph := (key64_048 ^ key64_056) + seed
|
||||
|
||||
input_lo := readU64(p, 0)
|
||||
input_hi := readU64(p, ui(l)-8)
|
||||
|
||||
m128_h, m128_l := bits.Mul64(input_lo^input_hi^bitflipl, prime64_1)
|
||||
|
||||
m128_l += uint64(l-1) << 54
|
||||
input_hi ^= bitfliph
|
||||
|
||||
m128_h += input_hi + uint64(uint32(input_hi))*(prime32_2-1)
|
||||
|
||||
m128_l ^= bits.ReverseBytes64(m128_h)
|
||||
|
||||
acc.Hi, acc.Lo = bits.Mul64(m128_l, prime64_2)
|
||||
acc.Hi += m128_h * prime64_2
|
||||
|
||||
acc.Lo = xxh3Avalanche(acc.Lo)
|
||||
acc.Hi = xxh3Avalanche(acc.Hi)
|
||||
|
||||
return acc
|
||||
|
||||
case l > 3: // 4-8
|
||||
seed ^= u64(bits.ReverseBytes32(u32(seed))) << 32
|
||||
bitflip := (key64_016 ^ key64_024) + seed
|
||||
input_lo := readU32(p, 0)
|
||||
input_hi := readU32(p, ui(l)-4)
|
||||
input_64 := u64(input_lo) + u64(input_hi)<<32
|
||||
keyed := input_64 ^ bitflip
|
||||
|
||||
acc.Hi, acc.Lo = bits.Mul64(keyed, prime64_1+(uint64(l)<<2))
|
||||
|
||||
acc.Hi += acc.Lo << 1
|
||||
acc.Lo ^= acc.Hi >> 3
|
||||
|
||||
acc.Lo ^= acc.Lo >> 35
|
||||
acc.Lo *= 0x9fb21c651e98df25
|
||||
acc.Lo ^= acc.Lo >> 28
|
||||
acc.Hi = xxh3Avalanche(acc.Hi)
|
||||
|
||||
return acc
|
||||
|
||||
case l == 3: // 3
|
||||
c12 := u64(readU16(p, 0))
|
||||
c3 := u64(readU8(p, 2))
|
||||
acc.Lo = c12<<16 + c3 + 3<<8
|
||||
|
||||
case l > 1: // 2
|
||||
c12 := u64(readU16(p, 0))
|
||||
acc.Lo = c12*(1<<24+1)>>8 + 2<<8
|
||||
|
||||
case l == 1: // 1
|
||||
c1 := u64(readU8(p, 0))
|
||||
acc.Lo = c1*(1<<24+1<<16+1) + 1<<8
|
||||
|
||||
default: // 0
|
||||
bitflipl := key64_064 ^ key64_072 ^ seed
|
||||
bitfliph := key64_080 ^ key64_088 ^ seed
|
||||
return u128{Lo: xxh64AvalancheFull(bitflipl), Hi: xxh64AvalancheFull(bitfliph)}
|
||||
}
|
||||
|
||||
acc.Hi = uint64(bits.RotateLeft32(bits.ReverseBytes32(uint32(acc.Lo)), 13))
|
||||
acc.Lo ^= uint64(key32_000^key32_004) + seed
|
||||
acc.Hi ^= uint64(key32_008^key32_012) - seed
|
||||
|
||||
acc.Lo = xxh64AvalancheFull(acc.Lo)
|
||||
acc.Hi = xxh64AvalancheFull(acc.Hi)
|
||||
|
||||
return acc
|
||||
|
||||
case l <= 128:
|
||||
acc.Lo = u64(l) * prime64_1
|
||||
|
||||
if l > 32 {
|
||||
if l > 64 {
|
||||
if l > 96 {
|
||||
in8, in7 := readU64(p, ui(l)-8*8), readU64(p, ui(l)-7*8)
|
||||
i6, i7 := readU64(p, 6*8), readU64(p, 7*8)
|
||||
|
||||
acc.Hi += mulFold64(in8^(key64_112+seed), in7^(key64_120-seed))
|
||||
acc.Hi ^= i6 + i7
|
||||
acc.Lo += mulFold64(i6^(key64_096+seed), i7^(key64_104-seed))
|
||||
acc.Lo ^= in8 + in7
|
||||
|
||||
} // 96
|
||||
|
||||
in6, in5 := readU64(p, ui(l)-6*8), readU64(p, ui(l)-5*8)
|
||||
i4, i5 := readU64(p, 4*8), readU64(p, 5*8)
|
||||
|
||||
acc.Hi += mulFold64(in6^(key64_080+seed), in5^(key64_088-seed))
|
||||
acc.Hi ^= i4 + i5
|
||||
acc.Lo += mulFold64(i4^(key64_064+seed), i5^(key64_072-seed))
|
||||
acc.Lo ^= in6 + in5
|
||||
|
||||
} // 64
|
||||
|
||||
in4, in3 := readU64(p, ui(l)-4*8), readU64(p, ui(l)-3*8)
|
||||
i2, i3 := readU64(p, 2*8), readU64(p, 3*8)
|
||||
|
||||
acc.Hi += mulFold64(in4^(key64_048+seed), in3^(key64_056-seed))
|
||||
acc.Hi ^= i2 + i3
|
||||
acc.Lo += mulFold64(i2^(key64_032+seed), i3^(key64_040-seed))
|
||||
acc.Lo ^= in4 + in3
|
||||
|
||||
} // 32
|
||||
|
||||
in2, in1 := readU64(p, ui(l)-2*8), readU64(p, ui(l)-1*8)
|
||||
i0, i1 := readU64(p, 0*8), readU64(p, 1*8)
|
||||
|
||||
acc.Hi += mulFold64(in2^(key64_016+seed), in1^(key64_024-seed))
|
||||
acc.Hi ^= i0 + i1
|
||||
acc.Lo += mulFold64(i0^(key64_000+seed), i1^(key64_008-seed))
|
||||
acc.Lo ^= in2 + in1
|
||||
|
||||
acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+((u64(l)-seed)*prime64_2), acc.Hi+acc.Lo
|
||||
|
||||
acc.Hi = -xxh3Avalanche(acc.Hi)
|
||||
acc.Lo = xxh3Avalanche(acc.Lo)
|
||||
|
||||
return acc
|
||||
|
||||
case l <= 240:
|
||||
acc.Lo = u64(l) * prime64_1
|
||||
|
||||
{
|
||||
i0, i1, i2, i3 := readU64(p, 0*8), readU64(p, 1*8), readU64(p, 2*8), readU64(p, 3*8)
|
||||
|
||||
acc.Hi += mulFold64(i2^(key64_016+seed), i3^(key64_024-seed))
|
||||
acc.Hi ^= i0 + i1
|
||||
acc.Lo += mulFold64(i0^(key64_000+seed), i1^(key64_008-seed))
|
||||
acc.Lo ^= i2 + i3
|
||||
}
|
||||
|
||||
{
|
||||
i0, i1, i2, i3 := readU64(p, 4*8), readU64(p, 5*8), readU64(p, 6*8), readU64(p, 7*8)
|
||||
|
||||
acc.Hi += mulFold64(i2^(key64_048+seed), i3^(key64_056-seed))
|
||||
acc.Hi ^= i0 + i1
|
||||
acc.Lo += mulFold64(i0^(key64_032+seed), i1^(key64_040-seed))
|
||||
acc.Lo ^= i2 + i3
|
||||
}
|
||||
|
||||
{
|
||||
i0, i1, i2, i3 := readU64(p, 8*8), readU64(p, 9*8), readU64(p, 10*8), readU64(p, 11*8)
|
||||
|
||||
acc.Hi += mulFold64(i2^(key64_080+seed), i3^(key64_088-seed))
|
||||
acc.Hi ^= i0 + i1
|
||||
acc.Lo += mulFold64(i0^(key64_064+seed), i1^(key64_072-seed))
|
||||
acc.Lo ^= i2 + i3
|
||||
}
|
||||
|
||||
{
|
||||
i0, i1, i2, i3 := readU64(p, 12*8), readU64(p, 13*8), readU64(p, 14*8), readU64(p, 15*8)
|
||||
|
||||
acc.Hi += mulFold64(i2^(key64_112+seed), i3^(key64_120-seed))
|
||||
acc.Hi ^= i0 + i1
|
||||
acc.Lo += mulFold64(i0^(key64_096+seed), i1^(key64_104-seed))
|
||||
acc.Lo ^= i2 + i3
|
||||
}
|
||||
|
||||
// avalanche
|
||||
acc.Hi = xxh3Avalanche(acc.Hi)
|
||||
acc.Lo = xxh3Avalanche(acc.Lo)
|
||||
|
||||
// trailing groups after 128
|
||||
top := ui(l) &^ 31
|
||||
for i := ui(4 * 32); i < top; i += 32 {
|
||||
i0, i1, i2, i3 := readU64(p, i+0), readU64(p, i+8), readU64(p, i+16), readU64(p, i+24)
|
||||
k0, k1, k2, k3 := readU64(key, i-125)+seed, readU64(key, i-117)-seed, readU64(key, i-109)+seed, readU64(key, i-101)-seed
|
||||
|
||||
acc.Hi += mulFold64(i2^k2, i3^k3)
|
||||
acc.Hi ^= i0 + i1
|
||||
acc.Lo += mulFold64(i0^k0, i1^k1)
|
||||
acc.Lo ^= i2 + i3
|
||||
}
|
||||
|
||||
// last 32 bytes
|
||||
{
|
||||
i0, i1, i2, i3 := readU64(p, ui(l)-32), readU64(p, ui(l)-24), readU64(p, ui(l)-16), readU64(p, ui(l)-8)
|
||||
|
||||
seed := 0 - seed
|
||||
acc.Hi += mulFold64(i0^(key64_119+seed), i1^(key64_127-seed))
|
||||
acc.Hi ^= i2 + i3
|
||||
acc.Lo += mulFold64(i2^(key64_103+seed), i3^(key64_111-seed))
|
||||
acc.Lo ^= i0 + i1
|
||||
}
|
||||
|
||||
acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+((u64(l)-seed)*prime64_2), acc.Hi+acc.Lo
|
||||
|
||||
acc.Hi = -xxh3Avalanche(acc.Hi)
|
||||
acc.Lo = xxh3Avalanche(acc.Lo)
|
||||
|
||||
return acc
|
||||
|
||||
default:
|
||||
acc.Lo = u64(l) * prime64_1
|
||||
acc.Hi = ^(u64(l) * prime64_2)
|
||||
|
||||
secret := key
|
||||
if seed != 0 {
|
||||
secret = ptr(&[secretSize]byte{})
|
||||
initSecret(secret, seed)
|
||||
}
|
||||
|
||||
accs := [8]u64{
|
||||
prime32_3, prime64_1, prime64_2, prime64_3,
|
||||
prime64_4, prime32_2, prime64_5, prime32_1,
|
||||
}
|
||||
|
||||
if hasAVX512 && l >= avx512Switch {
|
||||
accumAVX512(&accs, p, secret, u64(l))
|
||||
} else if hasAVX2 {
|
||||
accumAVX2(&accs, p, secret, u64(l))
|
||||
} else if hasSSE2 {
|
||||
accumSSE(&accs, p, secret, u64(l))
|
||||
} else {
|
||||
accumScalar(&accs, p, secret, u64(l))
|
||||
}
|
||||
|
||||
// merge accs
|
||||
const hi_off = 117 - 11
|
||||
|
||||
acc.Lo += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19))
|
||||
acc.Hi += mulFold64(accs[0]^readU64(secret, 11+hi_off), accs[1]^readU64(secret, 19+hi_off))
|
||||
|
||||
acc.Lo += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35))
|
||||
acc.Hi += mulFold64(accs[2]^readU64(secret, 27+hi_off), accs[3]^readU64(secret, 35+hi_off))
|
||||
|
||||
acc.Lo += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51))
|
||||
acc.Hi += mulFold64(accs[4]^readU64(secret, 43+hi_off), accs[5]^readU64(secret, 51+hi_off))
|
||||
|
||||
acc.Lo += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67))
|
||||
acc.Hi += mulFold64(accs[6]^readU64(secret, 59+hi_off), accs[7]^readU64(secret, 67+hi_off))
|
||||
|
||||
acc.Lo = xxh3Avalanche(acc.Lo)
|
||||
acc.Hi = xxh3Avalanche(acc.Hi)
|
||||
|
||||
return acc
|
||||
}
|
||||
}
|
126
vendor/github.com/zeebo/xxh3/hash64.go
generated
vendored
126
vendor/github.com/zeebo/xxh3/hash64.go
generated
vendored
@ -1,126 +0,0 @@
|
||||
package xxh3
|
||||
|
||||
import "math/bits"
|
||||
|
||||
// Hash returns the hash of the byte slice.
|
||||
func Hash(b []byte) uint64 {
|
||||
return hashAny(*(*str)(ptr(&b)))
|
||||
}
|
||||
|
||||
// Hash returns the hash of the string slice.
|
||||
func HashString(s string) uint64 {
|
||||
return hashAny(*(*str)(ptr(&s)))
|
||||
}
|
||||
|
||||
func hashAny(s str) (acc u64) {
|
||||
p, l := s.p, s.l
|
||||
|
||||
switch {
|
||||
case l <= 16:
|
||||
switch {
|
||||
case l > 8: // 9-16
|
||||
inputlo := readU64(p, 0) ^ (key64_024 ^ key64_032)
|
||||
inputhi := readU64(p, ui(l)-8) ^ (key64_040 ^ key64_048)
|
||||
folded := mulFold64(inputlo, inputhi)
|
||||
return xxh3Avalanche(u64(l) + bits.ReverseBytes64(inputlo) + inputhi + folded)
|
||||
|
||||
case l > 3: // 4-8
|
||||
input1 := readU32(p, 0)
|
||||
input2 := readU32(p, ui(l)-4)
|
||||
input64 := u64(input2) + u64(input1)<<32
|
||||
keyed := input64 ^ (key64_008 ^ key64_016)
|
||||
return rrmxmx(keyed, u64(l))
|
||||
|
||||
case l == 3: // 3
|
||||
c12 := u64(readU16(p, 0))
|
||||
c3 := u64(readU8(p, 2))
|
||||
acc = c12<<16 + c3 + 3<<8
|
||||
|
||||
case l > 1: // 2
|
||||
c12 := u64(readU16(p, 0))
|
||||
acc = c12*(1<<24+1)>>8 + 2<<8
|
||||
|
||||
case l == 1: // 1
|
||||
c1 := u64(readU8(p, 0))
|
||||
acc = c1*(1<<24+1<<16+1) + 1<<8
|
||||
|
||||
default: // 0
|
||||
return 0x2d06800538d394c2 // xxh_avalanche(key64_056 ^ key64_064)
|
||||
}
|
||||
|
||||
acc ^= u64(key32_000 ^ key32_004)
|
||||
return xxhAvalancheSmall(acc)
|
||||
|
||||
case l <= 128:
|
||||
acc = u64(l) * prime64_1
|
||||
|
||||
if l > 32 {
|
||||
if l > 64 {
|
||||
if l > 96 {
|
||||
acc += mulFold64(readU64(p, 6*8)^key64_096, readU64(p, 7*8)^key64_104)
|
||||
acc += mulFold64(readU64(p, ui(l)-8*8)^key64_112, readU64(p, ui(l)-7*8)^key64_120)
|
||||
} // 96
|
||||
acc += mulFold64(readU64(p, 4*8)^key64_064, readU64(p, 5*8)^key64_072)
|
||||
acc += mulFold64(readU64(p, ui(l)-6*8)^key64_080, readU64(p, ui(l)-5*8)^key64_088)
|
||||
} // 64
|
||||
acc += mulFold64(readU64(p, 2*8)^key64_032, readU64(p, 3*8)^key64_040)
|
||||
acc += mulFold64(readU64(p, ui(l)-4*8)^key64_048, readU64(p, ui(l)-3*8)^key64_056)
|
||||
} // 32
|
||||
acc += mulFold64(readU64(p, 0*8)^key64_000, readU64(p, 1*8)^key64_008)
|
||||
acc += mulFold64(readU64(p, ui(l)-2*8)^key64_016, readU64(p, ui(l)-1*8)^key64_024)
|
||||
|
||||
return xxh3Avalanche(acc)
|
||||
|
||||
case l <= 240:
|
||||
acc = u64(l) * prime64_1
|
||||
|
||||
acc += mulFold64(readU64(p, 0*16+0)^key64_000, readU64(p, 0*16+8)^key64_008)
|
||||
acc += mulFold64(readU64(p, 1*16+0)^key64_016, readU64(p, 1*16+8)^key64_024)
|
||||
acc += mulFold64(readU64(p, 2*16+0)^key64_032, readU64(p, 2*16+8)^key64_040)
|
||||
acc += mulFold64(readU64(p, 3*16+0)^key64_048, readU64(p, 3*16+8)^key64_056)
|
||||
acc += mulFold64(readU64(p, 4*16+0)^key64_064, readU64(p, 4*16+8)^key64_072)
|
||||
acc += mulFold64(readU64(p, 5*16+0)^key64_080, readU64(p, 5*16+8)^key64_088)
|
||||
acc += mulFold64(readU64(p, 6*16+0)^key64_096, readU64(p, 6*16+8)^key64_104)
|
||||
acc += mulFold64(readU64(p, 7*16+0)^key64_112, readU64(p, 7*16+8)^key64_120)
|
||||
|
||||
// avalanche
|
||||
acc = xxh3Avalanche(acc)
|
||||
|
||||
// trailing groups after 128
|
||||
top := ui(l) &^ 15
|
||||
for i := ui(8 * 16); i < top; i += 16 {
|
||||
acc += mulFold64(readU64(p, i+0)^readU64(key, i-125), readU64(p, i+8)^readU64(key, i-117))
|
||||
}
|
||||
|
||||
// last 16 bytes
|
||||
acc += mulFold64(readU64(p, ui(l)-16)^key64_119, readU64(p, ui(l)-8)^key64_127)
|
||||
|
||||
return xxh3Avalanche(acc)
|
||||
|
||||
default:
|
||||
acc = u64(l) * prime64_1
|
||||
|
||||
accs := [8]u64{
|
||||
prime32_3, prime64_1, prime64_2, prime64_3,
|
||||
prime64_4, prime32_2, prime64_5, prime32_1,
|
||||
}
|
||||
|
||||
if hasAVX512 && l >= avx512Switch {
|
||||
accumAVX512(&accs, p, key, u64(l))
|
||||
} else if hasAVX2 {
|
||||
accumAVX2(&accs, p, key, u64(l))
|
||||
} else if hasSSE2 {
|
||||
accumSSE(&accs, p, key, u64(l))
|
||||
} else {
|
||||
accumScalar(&accs, p, key, u64(l))
|
||||
}
|
||||
|
||||
// merge accs
|
||||
acc += mulFold64(accs[0]^key64_011, accs[1]^key64_019)
|
||||
acc += mulFold64(accs[2]^key64_027, accs[3]^key64_035)
|
||||
acc += mulFold64(accs[4]^key64_043, accs[5]^key64_051)
|
||||
acc += mulFold64(accs[6]^key64_059, accs[7]^key64_067)
|
||||
|
||||
return xxh3Avalanche(acc)
|
||||
}
|
||||
}
|
134
vendor/github.com/zeebo/xxh3/hash64_seed.go
generated
vendored
134
vendor/github.com/zeebo/xxh3/hash64_seed.go
generated
vendored
@ -1,134 +0,0 @@
|
||||
package xxh3
|
||||
|
||||
import "math/bits"
|
||||
|
||||
// HashSeed returns the hash of the byte slice with given seed.
|
||||
func HashSeed(b []byte, seed uint64) uint64 {
|
||||
return hashAnySeed(*(*str)(ptr(&b)), seed)
|
||||
|
||||
}
|
||||
|
||||
// HashStringSeed returns the hash of the string slice with given seed.
|
||||
func HashStringSeed(s string, seed uint64) uint64 {
|
||||
return hashAnySeed(*(*str)(ptr(&s)), seed)
|
||||
}
|
||||
|
||||
func hashAnySeed(s str, seed uint64) (acc u64) {
|
||||
p, l := s.p, s.l
|
||||
|
||||
switch {
|
||||
case l <= 16:
|
||||
switch {
|
||||
case l > 8:
|
||||
inputlo := readU64(p, 0) ^ (key64_024 ^ key64_032 + seed)
|
||||
inputhi := readU64(p, ui(l)-8) ^ (key64_040 ^ key64_048 - seed)
|
||||
folded := mulFold64(inputlo, inputhi)
|
||||
return xxh3Avalanche(u64(l) + bits.ReverseBytes64(inputlo) + inputhi + folded)
|
||||
|
||||
case l > 3:
|
||||
seed ^= u64(bits.ReverseBytes32(u32(seed))) << 32
|
||||
input1 := readU32(p, 0)
|
||||
input2 := readU32(p, ui(l)-4)
|
||||
input64 := u64(input2) + u64(input1)<<32
|
||||
keyed := input64 ^ (key64_008 ^ key64_016 - seed)
|
||||
return rrmxmx(keyed, u64(l))
|
||||
|
||||
case l == 3: // 3
|
||||
c12 := u64(readU16(p, 0))
|
||||
c3 := u64(readU8(p, 2))
|
||||
acc = c12<<16 + c3 + 3<<8
|
||||
|
||||
case l > 1: // 2
|
||||
c12 := u64(readU16(p, 0))
|
||||
acc = c12*(1<<24+1)>>8 + 2<<8
|
||||
|
||||
case l == 1: // 1
|
||||
c1 := u64(readU8(p, 0))
|
||||
acc = c1*(1<<24+1<<16+1) + 1<<8
|
||||
|
||||
default:
|
||||
return xxhAvalancheSmall(seed ^ key64_056 ^ key64_064)
|
||||
}
|
||||
|
||||
acc ^= u64(key32_000^key32_004) + seed
|
||||
return xxhAvalancheSmall(acc)
|
||||
|
||||
case l <= 128:
|
||||
acc = u64(l) * prime64_1
|
||||
|
||||
if l > 32 {
|
||||
if l > 64 {
|
||||
if l > 96 {
|
||||
acc += mulFold64(readU64(p, 6*8)^(key64_096+seed), readU64(p, 7*8)^(key64_104-seed))
|
||||
acc += mulFold64(readU64(p, ui(l)-8*8)^(key64_112+seed), readU64(p, ui(l)-7*8)^(key64_120-seed))
|
||||
} // 96
|
||||
acc += mulFold64(readU64(p, 4*8)^(key64_064+seed), readU64(p, 5*8)^(key64_072-seed))
|
||||
acc += mulFold64(readU64(p, ui(l)-6*8)^(key64_080+seed), readU64(p, ui(l)-5*8)^(key64_088-seed))
|
||||
} // 64
|
||||
acc += mulFold64(readU64(p, 2*8)^(key64_032+seed), readU64(p, 3*8)^(key64_040-seed))
|
||||
acc += mulFold64(readU64(p, ui(l)-4*8)^(key64_048+seed), readU64(p, ui(l)-3*8)^(key64_056-seed))
|
||||
} // 32
|
||||
acc += mulFold64(readU64(p, 0*8)^(key64_000+seed), readU64(p, 1*8)^(key64_008-seed))
|
||||
acc += mulFold64(readU64(p, ui(l)-2*8)^(key64_016+seed), readU64(p, ui(l)-1*8)^(key64_024-seed))
|
||||
|
||||
return xxh3Avalanche(acc)
|
||||
|
||||
case l <= 240:
|
||||
acc = u64(l) * prime64_1
|
||||
|
||||
acc += mulFold64(readU64(p, 0*16+0)^(key64_000+seed), readU64(p, 0*16+8)^(key64_008-seed))
|
||||
acc += mulFold64(readU64(p, 1*16+0)^(key64_016+seed), readU64(p, 1*16+8)^(key64_024-seed))
|
||||
acc += mulFold64(readU64(p, 2*16+0)^(key64_032+seed), readU64(p, 2*16+8)^(key64_040-seed))
|
||||
acc += mulFold64(readU64(p, 3*16+0)^(key64_048+seed), readU64(p, 3*16+8)^(key64_056-seed))
|
||||
acc += mulFold64(readU64(p, 4*16+0)^(key64_064+seed), readU64(p, 4*16+8)^(key64_072-seed))
|
||||
acc += mulFold64(readU64(p, 5*16+0)^(key64_080+seed), readU64(p, 5*16+8)^(key64_088-seed))
|
||||
acc += mulFold64(readU64(p, 6*16+0)^(key64_096+seed), readU64(p, 6*16+8)^(key64_104-seed))
|
||||
acc += mulFold64(readU64(p, 7*16+0)^(key64_112+seed), readU64(p, 7*16+8)^(key64_120-seed))
|
||||
|
||||
// avalanche
|
||||
acc = xxh3Avalanche(acc)
|
||||
|
||||
// trailing groups after 128
|
||||
top := ui(l) &^ 15
|
||||
for i := ui(8 * 16); i < top; i += 16 {
|
||||
acc += mulFold64(readU64(p, i+0)^(readU64(key, i-125)+seed), readU64(p, i+8)^(readU64(key, i-117)-seed))
|
||||
}
|
||||
|
||||
// last 16 bytes
|
||||
acc += mulFold64(readU64(p, ui(l)-16)^(key64_119+seed), readU64(p, ui(l)-8)^(key64_127-seed))
|
||||
|
||||
return xxh3Avalanche(acc)
|
||||
|
||||
default:
|
||||
acc = u64(l) * prime64_1
|
||||
|
||||
secret := key
|
||||
if seed != 0 {
|
||||
secret = ptr(&[secretSize]byte{})
|
||||
initSecret(secret, seed)
|
||||
}
|
||||
|
||||
accs := [8]u64{
|
||||
prime32_3, prime64_1, prime64_2, prime64_3,
|
||||
prime64_4, prime32_2, prime64_5, prime32_1,
|
||||
}
|
||||
|
||||
if hasAVX512 && l >= avx512Switch {
|
||||
accumAVX512(&accs, p, secret, u64(l))
|
||||
} else if hasAVX2 {
|
||||
accumAVX2(&accs, p, secret, u64(l))
|
||||
} else if hasSSE2 {
|
||||
accumSSE(&accs, p, secret, u64(l))
|
||||
} else {
|
||||
accumScalarSeed(&accs, p, secret, u64(l))
|
||||
}
|
||||
|
||||
// merge accs
|
||||
acc += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19))
|
||||
acc += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35))
|
||||
acc += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51))
|
||||
acc += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67))
|
||||
|
||||
return xxh3Avalanche(acc)
|
||||
}
|
||||
}
|
239
vendor/github.com/zeebo/xxh3/hasher.go
generated
vendored
239
vendor/github.com/zeebo/xxh3/hasher.go
generated
vendored
@ -1,239 +0,0 @@
|
||||
package xxh3
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"hash"
|
||||
)
|
||||
|
||||
// Hasher implements the hash.Hash interface
|
||||
type Hasher struct {
|
||||
acc [8]u64
|
||||
blk u64
|
||||
len u64
|
||||
key ptr
|
||||
buf [_block + _stripe]byte
|
||||
seed u64
|
||||
}
|
||||
|
||||
var (
|
||||
_ hash.Hash = (*Hasher)(nil)
|
||||
_ hash.Hash64 = (*Hasher)(nil)
|
||||
)
|
||||
|
||||
// New returns a new Hasher that implements the hash.Hash interface.
|
||||
func New() *Hasher {
|
||||
return new(Hasher)
|
||||
}
|
||||
|
||||
// NewSeed returns a new Hasher that implements the hash.Hash interface.
|
||||
func NewSeed(seed uint64) *Hasher {
|
||||
var h Hasher
|
||||
h.Reset()
|
||||
h.seed = seed
|
||||
h.key = key
|
||||
|
||||
// Only initiate once, not on reset.
|
||||
if seed != 0 {
|
||||
h.key = ptr(&[secretSize]byte{})
|
||||
initSecret(h.key, seed)
|
||||
}
|
||||
return &h
|
||||
}
|
||||
|
||||
// Reset resets the Hash to its initial state.
|
||||
func (h *Hasher) Reset() {
|
||||
h.acc = [8]u64{
|
||||
prime32_3, prime64_1, prime64_2, prime64_3,
|
||||
prime64_4, prime32_2, prime64_5, prime32_1,
|
||||
}
|
||||
h.blk = 0
|
||||
h.len = 0
|
||||
}
|
||||
|
||||
// BlockSize returns the hash's underlying block size.
|
||||
// The Write method will accept any amount of data, but
|
||||
// it may operate more efficiently if all writes are a
|
||||
// multiple of the block size.
|
||||
func (h *Hasher) BlockSize() int { return _stripe }
|
||||
|
||||
// Size returns the number of bytes Sum will return.
|
||||
func (h *Hasher) Size() int { return 8 }
|
||||
|
||||
// Sum appends the current hash to b and returns the resulting slice.
|
||||
// It does not change the underlying hash state.
|
||||
func (h *Hasher) Sum(b []byte) []byte {
|
||||
var tmp [8]byte
|
||||
binary.BigEndian.PutUint64(tmp[:], h.Sum64())
|
||||
return append(b, tmp[:]...)
|
||||
}
|
||||
|
||||
// Write adds more data to the running hash.
|
||||
// It never returns an error.
|
||||
func (h *Hasher) Write(buf []byte) (int, error) {
|
||||
h.update(buf)
|
||||
return len(buf), nil
|
||||
}
|
||||
|
||||
// WriteString adds more data to the running hash.
|
||||
// It never returns an error.
|
||||
func (h *Hasher) WriteString(buf string) (int, error) {
|
||||
h.updateString(buf)
|
||||
return len(buf), nil
|
||||
}
|
||||
|
||||
func (h *Hasher) update(buf []byte) {
|
||||
// relies on the data pointer being the first word in the string header
|
||||
h.updateString(*(*string)(ptr(&buf)))
|
||||
}
|
||||
|
||||
func (h *Hasher) updateString(buf string) {
|
||||
if h.key == nil {
|
||||
h.key = key
|
||||
h.Reset()
|
||||
}
|
||||
|
||||
// On first write, if more than 1 block, process without copy.
|
||||
for h.len == 0 && len(buf) > len(h.buf) {
|
||||
if hasAVX2 {
|
||||
accumBlockAVX2(&h.acc, *(*ptr)(ptr(&buf)), h.key)
|
||||
} else if hasSSE2 {
|
||||
accumBlockSSE(&h.acc, *(*ptr)(ptr(&buf)), h.key)
|
||||
} else {
|
||||
accumBlockScalar(&h.acc, *(*ptr)(ptr(&buf)), h.key)
|
||||
}
|
||||
buf = buf[_block:]
|
||||
h.blk++
|
||||
}
|
||||
|
||||
for len(buf) > 0 {
|
||||
if h.len < u64(len(h.buf)) {
|
||||
n := copy(h.buf[h.len:], buf)
|
||||
h.len += u64(n)
|
||||
buf = buf[n:]
|
||||
continue
|
||||
}
|
||||
|
||||
if hasAVX2 {
|
||||
accumBlockAVX2(&h.acc, ptr(&h.buf), h.key)
|
||||
} else if hasSSE2 {
|
||||
accumBlockSSE(&h.acc, ptr(&h.buf), h.key)
|
||||
} else {
|
||||
accumBlockScalar(&h.acc, ptr(&h.buf), h.key)
|
||||
}
|
||||
|
||||
h.blk++
|
||||
h.len = _stripe
|
||||
copy(h.buf[:_stripe], h.buf[_block:])
|
||||
}
|
||||
}
|
||||
|
||||
// Sum64 returns the 64-bit hash of the written data.
|
||||
func (h *Hasher) Sum64() uint64 {
|
||||
if h.key == nil {
|
||||
h.key = key
|
||||
h.Reset()
|
||||
}
|
||||
|
||||
if h.blk == 0 {
|
||||
if h.seed == 0 {
|
||||
return Hash(h.buf[:h.len])
|
||||
}
|
||||
return HashSeed(h.buf[:h.len], h.seed)
|
||||
}
|
||||
|
||||
l := h.blk*_block + h.len
|
||||
acc := l * prime64_1
|
||||
accs := h.acc
|
||||
|
||||
if h.len > 0 {
|
||||
// We are only ever doing 1 block here, so no avx512.
|
||||
if hasAVX2 {
|
||||
accumAVX2(&accs, ptr(&h.buf[0]), h.key, h.len)
|
||||
} else if hasSSE2 {
|
||||
accumSSE(&accs, ptr(&h.buf[0]), h.key, h.len)
|
||||
} else {
|
||||
accumScalar(&accs, ptr(&h.buf[0]), h.key, h.len)
|
||||
}
|
||||
}
|
||||
|
||||
if h.seed == 0 {
|
||||
acc += mulFold64(accs[0]^key64_011, accs[1]^key64_019)
|
||||
acc += mulFold64(accs[2]^key64_027, accs[3]^key64_035)
|
||||
acc += mulFold64(accs[4]^key64_043, accs[5]^key64_051)
|
||||
acc += mulFold64(accs[6]^key64_059, accs[7]^key64_067)
|
||||
} else {
|
||||
secret := h.key
|
||||
acc += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19))
|
||||
acc += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35))
|
||||
acc += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51))
|
||||
acc += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67))
|
||||
}
|
||||
|
||||
acc = xxh3Avalanche(acc)
|
||||
|
||||
return acc
|
||||
}
|
||||
|
||||
// Sum128 returns the 128-bit hash of the written data.
|
||||
func (h *Hasher) Sum128() Uint128 {
|
||||
if h.key == nil {
|
||||
h.key = key
|
||||
h.Reset()
|
||||
}
|
||||
|
||||
if h.blk == 0 {
|
||||
if h.seed == 0 {
|
||||
return Hash128(h.buf[:h.len])
|
||||
}
|
||||
return Hash128Seed(h.buf[:h.len], h.seed)
|
||||
}
|
||||
|
||||
l := h.blk*_block + h.len
|
||||
acc := Uint128{Lo: l * prime64_1, Hi: ^(l * prime64_2)}
|
||||
accs := h.acc
|
||||
|
||||
if h.len > 0 {
|
||||
// We are only ever doing 1 block here, so no avx512.
|
||||
if hasAVX2 {
|
||||
accumAVX2(&accs, ptr(&h.buf[0]), h.key, h.len)
|
||||
} else if hasSSE2 {
|
||||
accumSSE(&accs, ptr(&h.buf[0]), h.key, h.len)
|
||||
} else {
|
||||
accumScalar(&accs, ptr(&h.buf[0]), h.key, h.len)
|
||||
}
|
||||
}
|
||||
|
||||
if h.seed == 0 {
|
||||
acc.Lo += mulFold64(accs[0]^key64_011, accs[1]^key64_019)
|
||||
acc.Hi += mulFold64(accs[0]^key64_117, accs[1]^key64_125)
|
||||
|
||||
acc.Lo += mulFold64(accs[2]^key64_027, accs[3]^key64_035)
|
||||
acc.Hi += mulFold64(accs[2]^key64_133, accs[3]^key64_141)
|
||||
|
||||
acc.Lo += mulFold64(accs[4]^key64_043, accs[5]^key64_051)
|
||||
acc.Hi += mulFold64(accs[4]^key64_149, accs[5]^key64_157)
|
||||
|
||||
acc.Lo += mulFold64(accs[6]^key64_059, accs[7]^key64_067)
|
||||
acc.Hi += mulFold64(accs[6]^key64_165, accs[7]^key64_173)
|
||||
} else {
|
||||
secret := h.key
|
||||
const hi_off = 117 - 11
|
||||
|
||||
acc.Lo += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19))
|
||||
acc.Hi += mulFold64(accs[0]^readU64(secret, 11+hi_off), accs[1]^readU64(secret, 19+hi_off))
|
||||
|
||||
acc.Lo += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35))
|
||||
acc.Hi += mulFold64(accs[2]^readU64(secret, 27+hi_off), accs[3]^readU64(secret, 35+hi_off))
|
||||
|
||||
acc.Lo += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51))
|
||||
acc.Hi += mulFold64(accs[4]^readU64(secret, 43+hi_off), accs[5]^readU64(secret, 51+hi_off))
|
||||
|
||||
acc.Lo += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67))
|
||||
acc.Hi += mulFold64(accs[6]^readU64(secret, 59+hi_off), accs[7]^readU64(secret, 67+hi_off))
|
||||
}
|
||||
|
||||
acc.Lo = xxh3Avalanche(acc.Lo)
|
||||
acc.Hi = xxh3Avalanche(acc.Hi)
|
||||
|
||||
return acc
|
||||
}
|
129
vendor/github.com/zeebo/xxh3/utils.go
generated
vendored
129
vendor/github.com/zeebo/xxh3/utils.go
generated
vendored
@ -1,129 +0,0 @@
|
||||
package xxh3
|
||||
|
||||
import (
|
||||
"math/bits"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Uint128 is a 128 bit value.
|
||||
// The actual value can be thought of as u.Hi<<64 | u.Lo.
|
||||
type Uint128 struct {
|
||||
Hi, Lo uint64
|
||||
}
|
||||
|
||||
// Bytes returns the uint128 as an array of bytes in canonical form (big-endian encoded).
|
||||
func (u Uint128) Bytes() [16]byte {
|
||||
return [16]byte{
|
||||
byte(u.Hi >> 0x38), byte(u.Hi >> 0x30), byte(u.Hi >> 0x28), byte(u.Hi >> 0x20),
|
||||
byte(u.Hi >> 0x18), byte(u.Hi >> 0x10), byte(u.Hi >> 0x08), byte(u.Hi),
|
||||
byte(u.Lo >> 0x38), byte(u.Lo >> 0x30), byte(u.Lo >> 0x28), byte(u.Lo >> 0x20),
|
||||
byte(u.Lo >> 0x18), byte(u.Lo >> 0x10), byte(u.Lo >> 0x08), byte(u.Lo),
|
||||
}
|
||||
}
|
||||
|
||||
type (
|
||||
ptr = unsafe.Pointer
|
||||
ui = uintptr
|
||||
|
||||
u8 = uint8
|
||||
u32 = uint32
|
||||
u64 = uint64
|
||||
u128 = Uint128
|
||||
)
|
||||
|
||||
type str struct {
|
||||
p ptr
|
||||
l uint
|
||||
}
|
||||
|
||||
func readU8(p ptr, o ui) uint8 {
|
||||
return *(*uint8)(ptr(ui(p) + o))
|
||||
}
|
||||
|
||||
func readU16(p ptr, o ui) uint16 {
|
||||
b := (*[2]byte)(ptr(ui(p) + o))
|
||||
return uint16(b[0]) | uint16(b[1])<<8
|
||||
}
|
||||
|
||||
func readU32(p ptr, o ui) uint32 {
|
||||
b := (*[4]byte)(ptr(ui(p) + o))
|
||||
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
|
||||
}
|
||||
|
||||
func readU64(p ptr, o ui) uint64 {
|
||||
b := (*[8]byte)(ptr(ui(p) + o))
|
||||
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
|
||||
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
|
||||
}
|
||||
|
||||
func writeU64(p ptr, o ui, v u64) {
|
||||
b := (*[8]byte)(ptr(ui(p) + o))
|
||||
b[0] = byte(v)
|
||||
b[1] = byte(v >> 8)
|
||||
b[2] = byte(v >> 16)
|
||||
b[3] = byte(v >> 24)
|
||||
b[4] = byte(v >> 32)
|
||||
b[5] = byte(v >> 40)
|
||||
b[6] = byte(v >> 48)
|
||||
b[7] = byte(v >> 56)
|
||||
}
|
||||
|
||||
const secretSize = 192
|
||||
|
||||
func initSecret(secret ptr, seed u64) {
|
||||
for i := ui(0); i < secretSize/16; i++ {
|
||||
lo := readU64(key, 16*i) + seed
|
||||
hi := readU64(key, 16*i+8) - seed
|
||||
writeU64(secret, 16*i, lo)
|
||||
writeU64(secret, 16*i+8, hi)
|
||||
}
|
||||
}
|
||||
|
||||
func xxh64AvalancheSmall(x u64) u64 {
|
||||
// x ^= x >> 33 // x must be < 32 bits
|
||||
// x ^= u64(key32_000 ^ key32_004) // caller must do this
|
||||
x *= prime64_2
|
||||
x ^= x >> 29
|
||||
x *= prime64_3
|
||||
x ^= x >> 32
|
||||
return x
|
||||
}
|
||||
|
||||
func xxhAvalancheSmall(x u64) u64 {
|
||||
x ^= x >> 33
|
||||
x *= prime64_2
|
||||
x ^= x >> 29
|
||||
x *= prime64_3
|
||||
x ^= x >> 32
|
||||
return x
|
||||
}
|
||||
|
||||
func xxh64AvalancheFull(x u64) u64 {
|
||||
x ^= x >> 33
|
||||
x *= prime64_2
|
||||
x ^= x >> 29
|
||||
x *= prime64_3
|
||||
x ^= x >> 32
|
||||
return x
|
||||
}
|
||||
|
||||
func xxh3Avalanche(x u64) u64 {
|
||||
x ^= x >> 37
|
||||
x *= 0x165667919e3779f9
|
||||
x ^= x >> 32
|
||||
return x
|
||||
}
|
||||
|
||||
func rrmxmx(h64 u64, len u64) u64 {
|
||||
h64 ^= bits.RotateLeft64(h64, 49) ^ bits.RotateLeft64(h64, 24)
|
||||
h64 *= 0x9fb21c651e98df25
|
||||
h64 ^= (h64 >> 35) + len
|
||||
h64 *= 0x9fb21c651e98df25
|
||||
h64 ^= (h64 >> 28)
|
||||
return h64
|
||||
}
|
||||
|
||||
func mulFold64(x, y u64) u64 {
|
||||
hi, lo := bits.Mul64(x, y)
|
||||
return hi ^ lo
|
||||
}
|
Reference in New Issue
Block a user