[chore] update dependencies, bump to Go 1.19.1 (#826)

* update dependencies, bump Go version to 1.19

* bump test image Go version

* update golangci-lint

* update gotosocial-drone-build

* sign

* linting, go fmt

* update swagger docs

* update swagger docs

* whitespace

* update contributing.md

* fuckin whoopsie doopsie

* linterino, linteroni

* fix followrequest test not starting processor

* fix other api/client tests not starting processor

* fix remaining tests where processor not started

* bump go-runners version

* don't check last-webfingered-at, processor may have updated this

* update swagger command

* update bun to latest version

* fix embed to work the same as before with new bun

Signed-off-by: kim <grufwub@gmail.com>
Co-authored-by: tsmethurst <tobi.smethurst@protonmail.com>
This commit is contained in:
kim
2022-09-28 18:30:40 +01:00
committed by GitHub
parent 00d38855d4
commit a156188b3e
1135 changed files with 258905 additions and 137146 deletions

View File

@ -19,6 +19,7 @@ This is important, so you don't have to worry about spending CPU cycles on alrea
* Adjustable compression (3 levels)
* Concurrent stream compression
* Faster decompression, even for Snappy compatible content
* Concurrent Snappy/S2 stream decompression
* Ability to quickly skip forward in compressed stream
* Random seeking with indexes
* Compatible with reading Snappy compressed content
@ -415,6 +416,25 @@ Without assembly decompression is also very fast; single goroutine decompression
Even though S2 typically compresses better than Snappy, decompression speed is always better.
### Concurrent Stream Decompression
For full stream decompression S2 offers a [DecodeConcurrent](https://pkg.go.dev/github.com/klauspost/compress/s2#Reader.DecodeConcurrent)
that will decode a full stream using multiple goroutines.
Example scaling, AMD Ryzen 3950X, 16 cores, decompression using `s2d -bench=3 <input>`, best of 3:
| Input | `-cpu=1` | `-cpu=2` | `-cpu=4` | `-cpu=8` | `-cpu=16` |
|-------------------------------------------|------------|------------|------------|------------|-------------|
| enwik10.snappy | 1098.6MB/s | 1819.8MB/s | 3625.6MB/s | 6910.6MB/s | 10818.2MB/s |
| enwik10.s2 | 1303.5MB/s | 2606.1MB/s | 4847.9MB/s | 8878.4MB/s | 9592.1MB/s |
| sofia-air-quality-dataset.tar.snappy | 1302.0MB/s | 2165.0MB/s | 4244.5MB/s | 8241.0MB/s | 12920.5MB/s |
| sofia-air-quality-dataset.tar.s2 | 1399.2MB/s | 2463.2MB/s | 5196.5MB/s | 9639.8MB/s | 11439.5MB/s |
| sofia-air-quality-dataset.tar.s2 (no asm) | 837.5MB/s | 1652.6MB/s | 3183.6MB/s | 5945.0MB/s | 9620.7MB/s |
Scaling can be expected to be pretty linear until memory bandwidth is saturated.
For now the DecodeConcurrent can only be used for full streams without seeking or combining with regular reads.
## Block compression
@ -873,7 +893,7 @@ for each entry {
}
// Uncompressed uses previous offset and adds EstBlockSize
entry[entryNum].UncompressedOffset = entry[entryNum-1].UncompressedOffset + EstBlockSize
entry[entryNum].UncompressedOffset = entry[entryNum-1].UncompressedOffset + EstBlockSize + uOff
}
@ -901,6 +921,14 @@ for each entry {
}
```
To decode from any given uncompressed offset `(wantOffset)`:
* Iterate entries until `entry[n].UncompressedOffset > wantOffset`.
* Start decoding from `entry[n-1].CompressedOffset`.
* Discard `entry[n-1].UncompressedOffset - wantOffset` bytes from the decoded stream.
See [using indexes](https://github.com/klauspost/compress/tree/master/s2#using-indexes) for functions that perform the operations with a simpler interface.
# Format Extensions
* Frame [Stream identifier](https://github.com/google/snappy/blob/master/framing_format.txt#L68) changed from `sNaPpY` to `S2sTwO`.

View File

@ -11,6 +11,9 @@ import (
"fmt"
"io"
"io/ioutil"
"math"
"runtime"
"sync"
)
var (
@ -169,6 +172,14 @@ func ReaderSkippableCB(id uint8, fn func(r io.Reader) error) ReaderOption {
}
}
// ReaderIgnoreCRC will make the reader skip CRC calculation and checks.
func ReaderIgnoreCRC() ReaderOption {
return func(r *Reader) error {
r.ignoreCRC = true
return nil
}
}
// Reader is an io.Reader that can read Snappy-compressed bytes.
type Reader struct {
r io.Reader
@ -191,18 +202,19 @@ type Reader struct {
paramsOK bool
snappyFrame bool
ignoreStreamID bool
ignoreCRC bool
}
// ensureBufferSize will ensure that the buffer can take at least n bytes.
// If false is returned the buffer exceeds maximum allowed size.
func (r *Reader) ensureBufferSize(n int) bool {
if len(r.buf) >= n {
return true
}
if n > r.maxBufSize {
r.err = ErrCorrupt
return false
}
if cap(r.buf) >= n {
return true
}
// Realloc buffer.
r.buf = make([]byte, n)
return true
@ -220,6 +232,7 @@ func (r *Reader) Reset(reader io.Reader) {
r.err = nil
r.i = 0
r.j = 0
r.blockStart = 0
r.readHeader = r.ignoreStreamID
}
@ -344,7 +357,7 @@ func (r *Reader) Read(p []byte) (int, error) {
r.err = err
return 0, r.err
}
if crc(r.decoded[:n]) != checksum {
if !r.ignoreCRC && crc(r.decoded[:n]) != checksum {
r.err = ErrCRC
return 0, r.err
}
@ -385,7 +398,7 @@ func (r *Reader) Read(p []byte) (int, error) {
if !r.readFull(r.decoded[:n], false) {
return 0, r.err
}
if crc(r.decoded[:n]) != checksum {
if !r.ignoreCRC && crc(r.decoded[:n]) != checksum {
r.err = ErrCRC
return 0, r.err
}
@ -435,6 +448,259 @@ func (r *Reader) Read(p []byte) (int, error) {
}
}
// DecodeConcurrent will decode the full stream to w.
// This function should not be combined with reading, seeking or other operations.
// Up to 'concurrent' goroutines will be used.
// If <= 0, runtime.NumCPU will be used.
// On success the number of bytes decompressed nil and is returned.
// This is mainly intended for bigger streams.
func (r *Reader) DecodeConcurrent(w io.Writer, concurrent int) (written int64, err error) {
if r.i > 0 || r.j > 0 || r.blockStart > 0 {
return 0, errors.New("DecodeConcurrent called after ")
}
if concurrent <= 0 {
concurrent = runtime.NumCPU()
}
// Write to output
var errMu sync.Mutex
var aErr error
setErr := func(e error) (ok bool) {
errMu.Lock()
defer errMu.Unlock()
if e == nil {
return aErr == nil
}
if aErr == nil {
aErr = e
}
return false
}
hasErr := func() (ok bool) {
errMu.Lock()
v := aErr != nil
errMu.Unlock()
return v
}
var aWritten int64
toRead := make(chan []byte, concurrent)
writtenBlocks := make(chan []byte, concurrent)
queue := make(chan chan []byte, concurrent)
reUse := make(chan chan []byte, concurrent)
for i := 0; i < concurrent; i++ {
toRead <- make([]byte, 0, r.maxBufSize)
writtenBlocks <- make([]byte, 0, r.maxBufSize)
reUse <- make(chan []byte, 1)
}
// Writer
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
for toWrite := range queue {
entry := <-toWrite
reUse <- toWrite
if hasErr() {
writtenBlocks <- entry
continue
}
n, err := w.Write(entry)
want := len(entry)
writtenBlocks <- entry
if err != nil {
setErr(err)
continue
}
if n != want {
setErr(io.ErrShortWrite)
continue
}
aWritten += int64(n)
}
}()
// Reader
defer func() {
close(queue)
if r.err != nil {
err = r.err
setErr(r.err)
}
wg.Wait()
if err == nil {
err = aErr
}
written = aWritten
}()
for !hasErr() {
if !r.readFull(r.buf[:4], true) {
if r.err == io.EOF {
r.err = nil
}
return 0, r.err
}
chunkType := r.buf[0]
if !r.readHeader {
if chunkType != chunkTypeStreamIdentifier {
r.err = ErrCorrupt
return 0, r.err
}
r.readHeader = true
}
chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
// The chunk types are specified at
// https://github.com/google/snappy/blob/master/framing_format.txt
switch chunkType {
case chunkTypeCompressedData:
r.blockStart += int64(r.j)
// Section 4.2. Compressed data (chunk type 0x00).
if chunkLen < checksumSize {
r.err = ErrCorrupt
return 0, r.err
}
if chunkLen > r.maxBufSize {
r.err = ErrCorrupt
return 0, r.err
}
orgBuf := <-toRead
buf := orgBuf[:chunkLen]
if !r.readFull(buf, false) {
return 0, r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
buf = buf[checksumSize:]
n, err := DecodedLen(buf)
if err != nil {
r.err = err
return 0, r.err
}
if r.snappyFrame && n > maxSnappyBlockSize {
r.err = ErrCorrupt
return 0, r.err
}
if n > r.maxBlock {
r.err = ErrCorrupt
return 0, r.err
}
wg.Add(1)
decoded := <-writtenBlocks
entry := <-reUse
queue <- entry
go func() {
defer wg.Done()
decoded = decoded[:n]
_, err := Decode(decoded, buf)
toRead <- orgBuf
if err != nil {
writtenBlocks <- decoded
setErr(err)
return
}
if !r.ignoreCRC && crc(decoded) != checksum {
writtenBlocks <- decoded
setErr(ErrCRC)
return
}
entry <- decoded
}()
continue
case chunkTypeUncompressedData:
// Section 4.3. Uncompressed data (chunk type 0x01).
if chunkLen < checksumSize {
r.err = ErrCorrupt
return 0, r.err
}
if chunkLen > r.maxBufSize {
r.err = ErrCorrupt
return 0, r.err
}
// Grab write buffer
orgBuf := <-writtenBlocks
buf := orgBuf[:checksumSize]
if !r.readFull(buf, false) {
return 0, r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
// Read content.
n := chunkLen - checksumSize
if r.snappyFrame && n > maxSnappyBlockSize {
r.err = ErrCorrupt
return 0, r.err
}
if n > r.maxBlock {
r.err = ErrCorrupt
return 0, r.err
}
// Read uncompressed
buf = orgBuf[:n]
if !r.readFull(buf, false) {
return 0, r.err
}
if !r.ignoreCRC && crc(buf) != checksum {
r.err = ErrCRC
return 0, r.err
}
entry := <-reUse
queue <- entry
entry <- buf
continue
case chunkTypeStreamIdentifier:
// Section 4.1. Stream identifier (chunk type 0xff).
if chunkLen != len(magicBody) {
r.err = ErrCorrupt
return 0, r.err
}
if !r.readFull(r.buf[:len(magicBody)], false) {
return 0, r.err
}
if string(r.buf[:len(magicBody)]) != magicBody {
if string(r.buf[:len(magicBody)]) != magicBodySnappy {
r.err = ErrCorrupt
return 0, r.err
} else {
r.snappyFrame = true
}
} else {
r.snappyFrame = false
}
continue
}
if chunkType <= 0x7f {
// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
// fmt.Printf("ERR chunktype: 0x%x\n", chunkType)
r.err = ErrUnsupported
return 0, r.err
}
// Section 4.4 Padding (chunk type 0xfe).
// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
if chunkLen > maxChunkSize {
// fmt.Printf("ERR chunkLen: 0x%x\n", chunkLen)
r.err = ErrUnsupported
return 0, r.err
}
// fmt.Printf("skippable: ID: 0x%x, len: 0x%x\n", chunkType, chunkLen)
if !r.skippable(r.buf, chunkLen, false, chunkType) {
return 0, r.err
}
}
return 0, r.err
}
// Skip will skip n bytes forward in the decompressed output.
// For larger skips this consumes less CPU and is faster than reading output and discarding it.
// CRC is not checked on skipped blocks.
@ -454,7 +720,11 @@ func (r *Reader) Skip(n int64) error {
// decoded[i:j] contains decoded bytes that have not yet been passed on.
left := int64(r.j - r.i)
if left >= n {
r.i += int(n)
tmp := int64(r.i) + n
if tmp > math.MaxInt32 {
return errors.New("s2: internal overflow in skip")
}
r.i = int(tmp)
return nil
}
n -= int64(r.j - r.i)
@ -526,6 +796,7 @@ func (r *Reader) Skip(n int64) error {
} else {
// Skip block completely
n -= int64(dLen)
r.blockStart += int64(dLen)
dLen = 0
}
r.i, r.j = 0, dLen
@ -656,6 +927,15 @@ func (r *Reader) ReadSeeker(random bool, index []byte) (*ReadSeeker, error) {
err = r.index.LoadStream(rs)
if err != nil {
if err == ErrUnsupported {
// If we don't require random seeking, reset input and return.
if !random {
_, err = rs.Seek(pos, io.SeekStart)
if err != nil {
return nil, ErrCantSeek{Reason: "resetting stream returned: " + err.Error()}
}
r.index = nil
return &ReadSeeker{Reader: r}, nil
}
return nil, ErrCantSeek{Reason: "input stream does not contain an index"}
}
return nil, ErrCantSeek{Reason: "reading index returned: " + err.Error()}
@ -699,8 +979,16 @@ func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
case io.SeekCurrent:
offset += r.blockStart + int64(r.i)
case io.SeekEnd:
offset = -offset
if offset > 0 {
return 0, errors.New("seek after end of file")
}
offset = r.index.TotalUncompressed + offset
}
if offset < 0 {
return 0, errors.New("seek before start of file")
}
c, u, err := r.index.Find(offset)
if err != nil {
return r.blockStart + int64(r.i), err
@ -712,10 +1000,6 @@ func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
return 0, err
}
if offset < 0 {
offset = r.index.TotalUncompressed + offset
}
r.i = r.j // Remove rest of current block.
if u < offset {
// Forward inside block

View File

@ -1119,12 +1119,6 @@ func (w *Writer) closeIndex(idx bool) ([]byte, error) {
if w.appendIndex {
w.written += int64(len(index))
}
if true {
_, err := w.index.Load(index)
if err != nil {
panic(err)
}
}
}
if w.pad > 1 {

View File

@ -370,7 +370,7 @@ func encodeBlockBestSnappy(dst, src []byte) (d int) {
}
offset := m.s - m.offset
return score - emitCopySize(offset, m.length)
return score - emitCopyNoRepeatSize(offset, m.length)
}
matchAt := func(offset, s int, first uint32) match {
@ -567,6 +567,10 @@ func emitCopySize(offset, length int) int {
// Offset no more than 2 bytes.
if length > 64 {
if offset < 2048 {
// Emit 8 bytes, then rest as repeats...
return 2 + emitRepeatSize(offset, length-8)
}
// Emit remaining as repeats, at least 4 bytes remain.
return 3 + emitRepeatSize(offset, length-60)
}
@ -577,6 +581,28 @@ func emitCopySize(offset, length int) int {
return 2
}
// emitCopyNoRepeatSize returns the size to encode the offset+length
//
// It assumes that:
// 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24
func emitCopyNoRepeatSize(offset, length int) int {
if offset >= 65536 {
return 5 + 5*(length/64)
}
// Offset no more than 2 bytes.
if length > 64 {
// Emit remaining as repeats, at least 4 bytes remain.
return 3 + 3*(length/60)
}
if length >= 12 || offset >= 2048 {
return 3
}
// Emit the remaining copy, encoded as 2 bytes.
return 2
}
// emitRepeatSize returns the number of bytes required to encode a repeat.
// Length must be at least 4 and < 1<<24
func emitRepeatSize(offset, length int) int {

View File

@ -180,14 +180,23 @@ func emitCopy(dst []byte, offset, length int) int {
// Offset no more than 2 bytes.
if length > 64 {
// Emit a length 60 copy, encoded as 3 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 59<<2 | tagCopy2
length -= 60
off := 3
if offset < 2048 {
// emit 8 bytes as tagCopy1, rest as repeats.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
length -= 8
off = 2
} else {
// Emit a length 60 copy, encoded as 3 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 59<<2 | tagCopy2
length -= 60
}
// Emit remaining as repeats, at least 4 bytes remain.
return 3 + emitRepeat(dst[3:], offset, length)
return off + emitRepeat(dst[off:], offset, length)
}
if length >= 12 || offset >= 2048 {
// Emit the remaining copy, encoded as 3 bytes.

View File

@ -5,6 +5,8 @@
package s2
func _dummy_()
// encodeBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4294967295 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.

File diff suppressed because it is too large Load Diff

View File

@ -10,6 +10,7 @@ import (
"encoding/json"
"fmt"
"io"
"sort"
)
const (
@ -100,6 +101,15 @@ func (i *Index) Find(offset int64) (compressedOff, uncompressedOff int64, err er
if offset > i.TotalUncompressed {
return 0, 0, io.ErrUnexpectedEOF
}
if len(i.info) > 200 {
n := sort.Search(len(i.info), func(n int) bool {
return i.info[n].uncompressedOffset > offset
})
if n == 0 {
n = 1
}
return i.info[n-1].compressedOffset, i.info[n-1].uncompressedOffset, nil
}
for _, info := range i.info {
if info.uncompressedOffset > offset {
break
@ -523,3 +533,66 @@ func (i *Index) JSON() []byte {
b, _ := json.MarshalIndent(x, "", " ")
return b
}
// RemoveIndexHeaders will trim all headers and trailers from a given index.
// This is expected to save 20 bytes.
// These can be restored using RestoreIndexHeaders.
// This removes a layer of security, but is the most compact representation.
// Returns nil if headers contains errors.
// The returned slice references the provided slice.
func RemoveIndexHeaders(b []byte) []byte {
const save = 4 + len(S2IndexHeader) + len(S2IndexTrailer) + 4
if len(b) <= save {
return nil
}
if b[0] != ChunkTypeIndex {
return nil
}
chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16
b = b[4:]
// Validate we have enough...
if len(b) < chunkLen {
return nil
}
b = b[:chunkLen]
if !bytes.Equal(b[:len(S2IndexHeader)], []byte(S2IndexHeader)) {
return nil
}
b = b[len(S2IndexHeader):]
if !bytes.HasSuffix(b, []byte(S2IndexTrailer)) {
return nil
}
b = bytes.TrimSuffix(b, []byte(S2IndexTrailer))
if len(b) < 4 {
return nil
}
return b[:len(b)-4]
}
// RestoreIndexHeaders will index restore headers removed by RemoveIndexHeaders.
// No error checking is performed on the input.
// If a 0 length slice is sent, it is returned without modification.
func RestoreIndexHeaders(in []byte) []byte {
if len(in) == 0 {
return in
}
b := make([]byte, 0, 4+len(S2IndexHeader)+len(in)+len(S2IndexTrailer)+4)
b = append(b, ChunkTypeIndex, 0, 0, 0)
b = append(b, []byte(S2IndexHeader)...)
b = append(b, in...)
var tmp [4]byte
binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)+4+len(S2IndexTrailer)))
b = append(b, tmp[:4]...)
// Trailer
b = append(b, []byte(S2IndexTrailer)...)
chunkLen := len(b) - skippableFrameHeader
b[1] = uint8(chunkLen >> 0)
b[2] = uint8(chunkLen >> 8)
b[3] = uint8(chunkLen >> 16)
return b
}

View File

@ -1,46 +0,0 @@
language: go
os:
- linux
- osx
- windows
arch:
- amd64
- arm64
go:
- 1.12.x
- 1.13.x
- 1.14.x
- master
script:
- go vet ./...
- go test -race ./...
- go test -tags=noasm ./...
stages:
- gofmt
- test
matrix:
allow_failures:
- go: 'master'
fast_finish: true
include:
- stage: gofmt
go: 1.14.x
os: linux
arch: amd64
script:
- diff <(gofmt -d .) <(printf "")
- diff <(gofmt -d ./private) <(printf "")
- go install github.com/klauspost/asmfmt/cmd/asmfmt
- diff <(asmfmt -d .) <(printf "")
- stage: i386
go: 1.14.x
os: linux
arch: amd64
script:
- GOOS=linux GOARCH=386 go test .

View File

@ -1,191 +0,0 @@
# cpuid
Package cpuid provides information about the CPU running the current program.
CPU features are detected on startup, and kept for fast access through the life of the application.
Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
You can access the CPU information by accessing the shared CPU variable of the cpuid library.
Package home: https://github.com/klauspost/cpuid
[![GoDoc][1]][2] [![Build Status][3]][4]
[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg
[2]: https://godoc.org/github.com/klauspost/cpuid
[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
[4]: https://travis-ci.org/klauspost/cpuid
# features
## x86 CPU Instructions
* **CMOV** (i686 CMOV)
* **NX** (NX (No-Execute) bit)
* **AMD3DNOW** (AMD 3DNOW)
* **AMD3DNOWEXT** (AMD 3DNowExt)
* **MMX** (standard MMX)
* **MMXEXT** (SSE integer functions or AMD MMX ext)
* **SSE** (SSE functions)
* **SSE2** (P4 SSE functions)
* **SSE3** (Prescott SSE3 functions)
* **SSSE3** (Conroe SSSE3 functions)
* **SSE4** (Penryn SSE4.1 functions)
* **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
* **SSE42** (Nehalem SSE4.2 functions)
* **AVX** (AVX functions)
* **AVX2** (AVX2 functions)
* **FMA3** (Intel FMA 3)
* **FMA4** (Bulldozer FMA4 functions)
* **XOP** (Bulldozer XOP functions)
* **F16C** (Half-precision floating-point conversion)
* **BMI1** (Bit Manipulation Instruction Set 1)
* **BMI2** (Bit Manipulation Instruction Set 2)
* **TBM** (AMD Trailing Bit Manipulation)
* **LZCNT** (LZCNT instruction)
* **POPCNT** (POPCNT instruction)
* **AESNI** (Advanced Encryption Standard New Instructions)
* **CLMUL** (Carry-less Multiplication)
* **HTT** (Hyperthreading (enabled))
* **HLE** (Hardware Lock Elision)
* **RTM** (Restricted Transactional Memory)
* **RDRAND** (RDRAND instruction is available)
* **RDSEED** (RDSEED instruction is available)
* **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
* **SHA** (Intel SHA Extensions)
* **AVX512F** (AVX-512 Foundation)
* **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
* **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
* **AVX512PF** (AVX-512 Prefetch Instructions)
* **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
* **AVX512CD** (AVX-512 Conflict Detection Instructions)
* **AVX512BW** (AVX-512 Byte and Word Instructions)
* **AVX512VL** (AVX-512 Vector Length Extensions)
* **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
* **AVX512VBMI2** (AVX-512 Vector Bit Manipulation Instructions, Version 2)
* **AVX512VNNI** (AVX-512 Vector Neural Network Instructions)
* **AVX512VPOPCNTDQ** (AVX-512 Vector Population Count Doubleword and Quadword)
* **GFNI** (Galois Field New Instructions)
* **VAES** (Vector AES)
* **AVX512BITALG** (AVX-512 Bit Algorithms)
* **VPCLMULQDQ** (Carry-Less Multiplication Quadword)
* **AVX512BF16** (AVX-512 BFLOAT16 Instructions)
* **AVX512VP2INTERSECT** (AVX-512 Intersect for D/Q)
* **MPX** (Intel MPX (Memory Protection Extensions))
* **ERMS** (Enhanced REP MOVSB/STOSB)
* **RDTSCP** (RDTSCP Instruction)
* **CX16** (CMPXCHG16B Instruction)
* **SGX** (Software Guard Extensions, with activation details)
* **VMX** (Virtual Machine Extensions)
## Performance
* **RDTSCP()** Returns current cycle count. Can be used for benchmarking.
* **SSE2SLOW** (SSE2 is supported, but usually not faster)
* **SSE3SLOW** (SSE3 is supported, but usually not faster)
* **ATOM** (Atom processor, some SSSE3 instructions are slower)
* **Cache line** (Probable size of a cache line).
* **L1, L2, L3 Cache size** on newer Intel/AMD CPUs.
## ARM CPU features
# ARM FEATURE DETECTION DISABLED!
See [#52](https://github.com/klauspost/cpuid/issues/52).
Currently only `arm64` platforms are implemented.
* **FP** Single-precision and double-precision floating point
* **ASIMD** Advanced SIMD
* **EVTSTRM** Generic timer
* **AES** AES instructions
* **PMULL** Polynomial Multiply instructions (PMULL/PMULL2)
* **SHA1** SHA-1 instructions (SHA1C, etc)
* **SHA2** SHA-2 instructions (SHA256H, etc)
* **CRC32** CRC32/CRC32C instructions
* **ATOMICS** Large System Extensions (LSE)
* **FPHP** Half-precision floating point
* **ASIMDHP** Advanced SIMD half-precision floating point
* **ARMCPUID** Some CPU ID registers readable at user-level
* **ASIMDRDM** Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
* **JSCVT** Javascript-style double->int convert (FJCVTZS)
* **FCMA** Floating point complex number addition and multiplication
* **LRCPC** Weaker release consistency (LDAPR, etc)
* **DCPOP** Data cache clean to Point of Persistence (DC CVAP)
* **SHA3** SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
* **SM3** SM3 instructions
* **SM4** SM4 instructions
* **ASIMDDP** SIMD Dot Product
* **SHA512** SHA512 instructions
* **SVE** Scalable Vector Extension
* **GPA** Generic Pointer Authentication
## Cpu Vendor/VM
* **Intel**
* **AMD**
* **VIA**
* **Transmeta**
* **NSC**
* **KVM** (Kernel-based Virtual Machine)
* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
* **VMware**
* **XenHVM**
* **Bhyve**
* **Hygon**
# installing
```go get github.com/klauspost/cpuid```
# example
```Go
package main
import (
"fmt"
"github.com/klauspost/cpuid"
)
func main() {
// Print basic CPU information:
fmt.Println("Name:", cpuid.CPU.BrandName)
fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore)
fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores)
fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
fmt.Println("Features:", cpuid.CPU.Features)
fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes")
fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
// Test if we have a specific feature:
if cpuid.CPU.SSE() {
fmt.Println("We have Streaming SIMD Extensions")
}
}
```
Sample output:
```
>go run main.go
Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz
PhysicalCores: 2
ThreadsPerCore: 2
LogicalCores: 4
Family 6 Model: 42
Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL
Cacheline bytes: 64
We have Streaming SIMD Extensions
```
# private package
In the "private" folder you can find an autogenerated version of the library you can include in your own packages.
For this purpose all exports are removed, and functions and constants are lowercased.
This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages.
# license
This code is published under an MIT license. See LICENSE file for more information.

File diff suppressed because it is too large Load Diff

74
vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml generated vendored Normal file
View File

@ -0,0 +1,74 @@
# This is an example goreleaser.yaml file with some sane defaults.
# Make sure to check the documentation at http://goreleaser.com
builds:
-
id: "cpuid"
binary: cpuid
main: ./cmd/cpuid/main.go
env:
- CGO_ENABLED=0
flags:
- -ldflags=-s -w
goos:
- aix
- linux
- freebsd
- netbsd
- windows
- darwin
goarch:
- 386
- amd64
- arm64
goarm:
- 7
archives:
-
id: cpuid
name_template: "cpuid-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
replacements:
aix: AIX
darwin: OSX
linux: Linux
windows: Windows
386: i386
amd64: x86_64
freebsd: FreeBSD
netbsd: NetBSD
format_overrides:
- goos: windows
format: zip
files:
- LICENSE
checksum:
name_template: 'checksums.txt'
snapshot:
name_template: "{{ .Tag }}-next"
changelog:
sort: asc
filters:
exclude:
- '^doc:'
- '^docs:'
- '^test:'
- '^tests:'
- '^Update\sREADME.md'
nfpms:
-
file_name_template: "cpuid_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
vendor: Klaus Post
homepage: https://github.com/klauspost/cpuid
maintainer: Klaus Post <klauspost@gmail.com>
description: CPUID Tool
license: BSD 3-Clause
formats:
- deb
- rpm
replacements:
darwin: Darwin
linux: Linux
freebsd: FreeBSD
amd64: x86_64

258
vendor/github.com/klauspost/cpuid/v2/README.md generated vendored Normal file
View File

@ -0,0 +1,258 @@
# cpuid
Package cpuid provides information about the CPU running the current program.
CPU features are detected on startup, and kept for fast access through the life of the application.
Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
You can access the CPU information by accessing the shared CPU variable of the cpuid library.
Package home: https://github.com/klauspost/cpuid
[![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
[![Build Status][3]][4]
[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
[4]: https://travis-ci.org/klauspost/cpuid
## installing
`go get -u github.com/klauspost/cpuid/v2` using modules.
Drop `v2` for others.
## example
```Go
package main
import (
"fmt"
"strings"
. "github.com/klauspost/cpuid/v2"
)
func main() {
// Print basic CPU information:
fmt.Println("Name:", CPU.BrandName)
fmt.Println("PhysicalCores:", CPU.PhysicalCores)
fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore)
fmt.Println("LogicalCores:", CPU.LogicalCores)
fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID)
fmt.Println("Features:", strings.Join(CPU.FeatureSet(), ","))
fmt.Println("Cacheline bytes:", CPU.CacheLine)
fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes")
fmt.Println("L1 Instruction Cache:", CPU.Cache.L1I, "bytes")
fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes")
fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes")
fmt.Println("Frequency", CPU.Hz, "hz")
// Test if we have these specific features:
if CPU.Supports(SSE, SSE2) {
fmt.Println("We have Streaming SIMD 2 Extensions")
}
}
```
Sample output:
```
>go run main.go
Name: AMD Ryzen 9 3950X 16-Core Processor
PhysicalCores: 16
ThreadsPerCore: 2
LogicalCores: 32
Family 23 Model: 113 Vendor ID: AMD
Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CMOV,CX16,F16C,FMA3,HTT,HYPERVISOR,LZCNT,MMX,MMXEXT,NX,POPCNT,RDRAND,RDSEED,RDTSCP,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3
Cacheline bytes: 64
L1 Data Cache: 32768 bytes
L1 Instruction Cache: 32768 bytes
L2 Cache: 524288 bytes
L3 Cache: 16777216 bytes
Frequency 0 hz
We have Streaming SIMD 2 Extensions
```
# usage
The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features.
A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler.
Note that for some cpu/os combinations some features will not be detected.
`amd64` has rather good support and should work reliably on all platforms.
Note that hypervisors may not pass through all CPU features.
## arm64 feature detection
Not all operating systems provide ARM features directly
and there is no safe way to do so for the rest.
Currently `arm64/linux` and `arm64/freebsd` should be quite reliable.
`arm64/darwin` adds features expected from the M1 processor, but a lot remains undetected.
A `DetectARM()` can be used if you are able to control your deployment,
it will detect CPU features, but may crash if the OS doesn't intercept the calls.
A `-cpu.arm` flag for detecting unsafe ARM features can be added. See below.
Note that currently only features are detected on ARM,
no additional information is currently available.
## flags
It is possible to add flags that affects cpu detection.
For this the `Flags()` command is provided.
This must be called *before* `flag.Parse()` AND after the flags have been parsed `Detect()` must be called.
This means that any detection used in `init()` functions will not contain these flags.
Example:
```Go
package main
import (
"flag"
"fmt"
"strings"
"github.com/klauspost/cpuid/v2"
)
func main() {
cpuid.Flags()
flag.Parse()
cpuid.Detect()
// Test if we have these specific features:
if cpuid.CPU.Supports(cpuid.SSE, cpuid.SSE2) {
fmt.Println("We have Streaming SIMD 2 Extensions")
}
}
```
## commandline
Download as binary from: https://github.com/klauspost/cpuid/releases
Install from source:
`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
### Example
```
λ cpuid
Name: AMD Ryzen 9 3950X 16-Core Processor
Vendor String: AuthenticAMD
Vendor ID: AMD
PhysicalCores: 16
Threads Per Core: 2
Logical Cores: 32
CPU Family 23 Model: 113
Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CLZERO,CMOV,CMPXCHG8,CPBOOST,CX16,F16C,FMA3,FXSR,FXSROPT,HTT,HYPERVISOR,LAHF,LZCNT,MCAOVERFLOW,MMX,MMXEXT,MOVBE,NX,OSXSAVE,POPCNT,RDRAND,RDSEED,RDTSCP,SCE,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3,SUCCOR,X87,XSAVE
Microarchitecture level: 3
Cacheline bytes: 64
L1 Instruction Cache: 32768 bytes
L1 Data Cache: 32768 bytes
L2 Cache: 524288 bytes
L3 Cache: 16777216 bytes
```
### JSON Output:
```
λ cpuid --json
{
"BrandName": "AMD Ryzen 9 3950X 16-Core Processor",
"VendorID": 2,
"VendorString": "AuthenticAMD",
"PhysicalCores": 16,
"ThreadsPerCore": 2,
"LogicalCores": 32,
"Family": 23,
"Model": 113,
"CacheLine": 64,
"Hz": 0,
"BoostFreq": 0,
"Cache": {
"L1I": 32768,
"L1D": 32768,
"L2": 524288,
"L3": 16777216
},
"SGX": {
"Available": false,
"LaunchControl": false,
"SGX1Supported": false,
"SGX2Supported": false,
"MaxEnclaveSizeNot64": 0,
"MaxEnclaveSize64": 0,
"EPCSections": null
},
"Features": [
"ADX",
"AESNI",
"AVX",
"AVX2",
"BMI1",
"BMI2",
"CLMUL",
"CLZERO",
"CMOV",
"CMPXCHG8",
"CPBOOST",
"CX16",
"F16C",
"FMA3",
"FXSR",
"FXSROPT",
"HTT",
"HYPERVISOR",
"LAHF",
"LZCNT",
"MCAOVERFLOW",
"MMX",
"MMXEXT",
"MOVBE",
"NX",
"OSXSAVE",
"POPCNT",
"RDRAND",
"RDSEED",
"RDTSCP",
"SCE",
"SHA",
"SSE",
"SSE2",
"SSE3",
"SSE4",
"SSE42",
"SSE4A",
"SSSE3",
"SUCCOR",
"X87",
"XSAVE"
],
"X64Level": 3
}
```
### Check CPU microarch level
```
λ cpuid --check-level=3
2022/03/18 17:04:40 AMD Ryzen 9 3950X 16-Core Processor
2022/03/18 17:04:40 Microarchitecture level 3 is supported. Max level is 3.
Exit Code 0
λ cpuid --check-level=4
2022/03/18 17:06:18 AMD Ryzen 9 3950X 16-Core Processor
2022/03/18 17:06:18 Microarchitecture level 4 not supported. Max level is 3.
Exit Code 1
```
# license
This code is published under an MIT license. See LICENSE file for more information.

1291
vendor/github.com/klauspost/cpuid/v2/cpuid.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -40,3 +40,8 @@ TEXT ·asmRdtscpAsm(SB), 7, $0
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET
// func asmDarwinHasAVX512() bool
TEXT ·asmDarwinHasAVX512(SB), 7, $0
MOVL $0, eax+0(FP)
RET

View File

@ -40,3 +40,33 @@ TEXT ·asmRdtscpAsm(SB), 7, $0
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET
// From https://go-review.googlesource.com/c/sys/+/285572/
// func asmDarwinHasAVX512() bool
TEXT ·asmDarwinHasAVX512(SB), 7, $0-1
MOVB $0, ret+0(FP) // default to false
#ifdef GOOS_darwin // return if not darwin
#ifdef GOARCH_amd64 // return if not amd64
// These values from:
// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
#define commpage64_base_address 0x00007fffffe00000
#define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
#define commpage64_version (commpage64_base_address+0x01E)
#define hasAVX512F 0x0000004000000000
MOVQ $commpage64_version, BX
MOVW (BX), AX
CMPW AX, $13 // versions < 13 do not support AVX512
JL no_avx512
MOVQ $commpage64_cpu_capabilities64, BX
MOVQ (BX), AX
MOVQ $hasAVX512F, CX
ANDQ CX, AX
JZ no_avx512
MOVB $1, ret+0(FP)
no_avx512:
#endif
#endif
RET

View File

@ -1,6 +1,6 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build arm64,!gccgo
//+build arm64,!gccgo,!noasm,!appengine
// See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt

View File

@ -1,9 +1,12 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build arm64,!gccgo,!noasm,!appengine
//go:build arm64 && !gccgo && !noasm && !appengine
// +build arm64,!gccgo,!noasm,!appengine
package cpuid
import "runtime"
func getMidr() (midr uint64)
func getProcFeatures() (procFeatures uint64)
func getInstAttributes() (instAttrReg0, instAttrReg1 uint64)
@ -15,14 +18,19 @@ func initCPU() {
rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
}
func addInfo(c *CPUInfo) {
// ARM64 disabled for now.
if true {
func addInfo(c *CPUInfo, safe bool) {
// Seems to be safe to assume on ARM64
c.CacheLine = 64
detectOS(c)
// ARM64 disabled since it may crash if interrupt is not intercepted by OS.
if safe && !c.Supports(ARMCPUID) && runtime.GOOS != "freebsd" {
return
}
// midr := getMidr()
midr := getMidr()
// MIDR_EL1 - Main ID Register
// https://developer.arm.com/docs/ddi0595/h/aarch64-system-registers/midr_el1
// x--------------------------------------------------x
// | Name | bits | visible |
// |--------------------------------------------------|
@ -37,11 +45,70 @@ func addInfo(c *CPUInfo) {
// | Revision | [3-0] | y |
// x--------------------------------------------------x
// fmt.Printf(" implementer: 0x%02x\n", (midr>>24)&0xff)
// fmt.Printf(" variant: 0x%01x\n", (midr>>20)&0xf)
// fmt.Printf("architecture: 0x%01x\n", (midr>>16)&0xf)
// fmt.Printf(" part num: 0x%03x\n", (midr>>4)&0xfff)
// fmt.Printf(" revision: 0x%01x\n", (midr>>0)&0xf)
switch (midr >> 24) & 0xff {
case 0xC0:
c.VendorString = "Ampere Computing"
c.VendorID = Ampere
case 0x41:
c.VendorString = "Arm Limited"
c.VendorID = ARM
case 0x42:
c.VendorString = "Broadcom Corporation"
c.VendorID = Broadcom
case 0x43:
c.VendorString = "Cavium Inc"
c.VendorID = Cavium
case 0x44:
c.VendorString = "Digital Equipment Corporation"
c.VendorID = DEC
case 0x46:
c.VendorString = "Fujitsu Ltd"
c.VendorID = Fujitsu
case 0x49:
c.VendorString = "Infineon Technologies AG"
c.VendorID = Infineon
case 0x4D:
c.VendorString = "Motorola or Freescale Semiconductor Inc"
c.VendorID = Motorola
case 0x4E:
c.VendorString = "NVIDIA Corporation"
c.VendorID = NVIDIA
case 0x50:
c.VendorString = "Applied Micro Circuits Corporation"
c.VendorID = AMCC
case 0x51:
c.VendorString = "Qualcomm Inc"
c.VendorID = Qualcomm
case 0x56:
c.VendorString = "Marvell International Ltd"
c.VendorID = Marvell
case 0x69:
c.VendorString = "Intel Corporation"
c.VendorID = Intel
}
// Lower 4 bits: Architecture
// Architecture Meaning
// 0b0001 Armv4.
// 0b0010 Armv4T.
// 0b0011 Armv5 (obsolete).
// 0b0100 Armv5T.
// 0b0101 Armv5TE.
// 0b0110 Armv5TEJ.
// 0b0111 Armv6.
// 0b1111 Architectural features are individually identified in the ID_* registers, see 'ID registers'.
// Upper 4 bit: Variant
// An IMPLEMENTATION DEFINED variant number.
// Typically, this field is used to distinguish between different product variants, or major revisions of a product.
c.Family = int(midr>>16) & 0xff
// PartNum, bits [15:4]
// An IMPLEMENTATION DEFINED primary part number for the device.
// On processors implemented by Arm, if the top four bits of the primary
// part number are 0x0 or 0x7, the variant and architecture are encoded differently.
// Revision, bits [3:0]
// An IMPLEMENTATION DEFINED revision number for the device.
c.Model = int(midr) & 0xffff
procFeatures := getProcFeatures()
@ -68,25 +135,18 @@ func addInfo(c *CPUInfo) {
// | EL0 | [3-0] | n |
// x--------------------------------------------------x
var f ArmFlags
var f flagSet
// if procFeatures&(0xf<<48) != 0 {
// fmt.Println("DIT")
// }
if procFeatures&(0xf<<32) != 0 {
f |= SVE
}
f.setIf(procFeatures&(0xf<<32) != 0, SVE)
if procFeatures&(0xf<<20) != 15<<20 {
f |= ASIMD
if procFeatures&(0xf<<20) == 1<<20 {
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
// 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
f |= FPHP
f |= ASIMDHP
}
}
if procFeatures&(0xf<<16) != 0 {
f |= FP
f.set(ASIMD)
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
// 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
f.setIf(procFeatures&(0xf<<20) == 1<<20, FPHP, ASIMDHP)
}
f.setIf(procFeatures&(0xf<<16) != 0, FP)
instAttrReg0, instAttrReg1 := getInstAttributes()
@ -127,46 +187,22 @@ func addInfo(c *CPUInfo) {
// if instAttrReg0&(0xf<<48) != 0 {
// fmt.Println("FHM")
// }
if instAttrReg0&(0xf<<44) != 0 {
f |= ASIMDDP
}
if instAttrReg0&(0xf<<40) != 0 {
f |= SM4
}
if instAttrReg0&(0xf<<36) != 0 {
f |= SM3
}
if instAttrReg0&(0xf<<32) != 0 {
f |= SHA3
}
if instAttrReg0&(0xf<<28) != 0 {
f |= ASIMDRDM
}
if instAttrReg0&(0xf<<20) != 0 {
f |= ATOMICS
}
if instAttrReg0&(0xf<<16) != 0 {
f |= CRC32
}
if instAttrReg0&(0xf<<12) != 0 {
f |= SHA2
}
if instAttrReg0&(0xf<<12) == 2<<12 {
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
// 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
f |= SHA512
}
if instAttrReg0&(0xf<<8) != 0 {
f |= SHA1
}
if instAttrReg0&(0xf<<4) != 0 {
f |= AES
}
if instAttrReg0&(0xf<<4) == 2<<4 {
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
// 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
f |= PMULL
}
f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP)
f.setIf(instAttrReg0&(0xf<<40) != 0, SM4)
f.setIf(instAttrReg0&(0xf<<36) != 0, SM3)
f.setIf(instAttrReg0&(0xf<<32) != 0, SHA3)
f.setIf(instAttrReg0&(0xf<<28) != 0, ASIMDRDM)
f.setIf(instAttrReg0&(0xf<<20) != 0, ATOMICS)
f.setIf(instAttrReg0&(0xf<<16) != 0, CRC32)
f.setIf(instAttrReg0&(0xf<<12) != 0, SHA2)
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
// 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
f.setIf(instAttrReg0&(0xf<<12) == 2<<12, SHA512)
f.setIf(instAttrReg0&(0xf<<8) != 0, SHA1)
f.setIf(instAttrReg0&(0xf<<4) != 0, AESARM)
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
// 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
f.setIf(instAttrReg0&(0xf<<4) == 2<<4, PMULL)
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1
//
@ -194,26 +230,18 @@ func addInfo(c *CPUInfo) {
// if instAttrReg1&(0xf<<28) != 0 {
// fmt.Println("GPI")
// }
if instAttrReg1&(0xf<<28) != 24 {
f |= GPA
}
if instAttrReg1&(0xf<<20) != 0 {
f |= LRCPC
}
if instAttrReg1&(0xf<<16) != 0 {
f |= FCMA
}
if instAttrReg1&(0xf<<12) != 0 {
f |= JSCVT
}
f.setIf(instAttrReg1&(0xf<<28) != 24, GPA)
f.setIf(instAttrReg1&(0xf<<20) != 0, LRCPC)
f.setIf(instAttrReg1&(0xf<<16) != 0, FCMA)
f.setIf(instAttrReg1&(0xf<<12) != 0, JSCVT)
// if instAttrReg1&(0xf<<8) != 0 {
// fmt.Println("API")
// }
// if instAttrReg1&(0xf<<4) != 0 {
// fmt.Println("APA")
// }
if instAttrReg1&(0xf<<0) != 0 {
f |= DCPOP
}
c.Arm = f
f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP)
// Store
c.featureSet.or(f)
}

View File

@ -1,6 +1,7 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build !amd64,!386,!arm64 gccgo noasm appengine
//go:build (!amd64 && !386 && !arm64) || gccgo || noasm || appengine
// +build !amd64,!386,!arm64 gccgo noasm appengine
package cpuid
@ -11,4 +12,4 @@ func initCPU() {
rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
}
func addInfo(info *CPUInfo) {}
func addInfo(info *CPUInfo, safe bool) {}

View File

@ -1,6 +1,7 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build 386,!gccgo,!noasm amd64,!gccgo,!noasm,!appengine
//go:build (386 && !gccgo && !noasm && !appengine) || (amd64 && !gccgo && !noasm && !appengine)
// +build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine
package cpuid
@ -8,26 +9,28 @@ func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
func asmXgetbv(index uint32) (eax, edx uint32)
func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
func asmDarwinHasAVX512() bool
func initCPU() {
cpuid = asmCpuid
cpuidex = asmCpuidex
xgetbv = asmXgetbv
rdtscpAsm = asmRdtscpAsm
darwinHasAVX512 = asmDarwinHasAVX512
}
func addInfo(c *CPUInfo) {
func addInfo(c *CPUInfo, safe bool) {
c.maxFunc = maxFunctionID()
c.maxExFunc = maxExtendedFunction()
c.BrandName = brandName()
c.CacheLine = cacheLine()
c.Family, c.Model = familyModel()
c.Features = support()
c.SGX = hasSGX(c.Features&SGX != 0, c.Features&SGXLC != 0)
c.Family, c.Model, c.Stepping = familyModel()
c.featureSet = support()
c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
c.ThreadsPerCore = threadsPerCore()
c.LogicalCores = logicalCores()
c.PhysicalCores = physicalCores()
c.VendorID, c.VendorString = vendorID()
c.Hz = hertz(c.BrandName)
c.cacheSize()
c.frequencies()
}

View File

@ -0,0 +1,235 @@
// Code generated by "stringer -type=FeatureID,Vendor"; DO NOT EDIT.
package cpuid
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[ADX-1]
_ = x[AESNI-2]
_ = x[AMD3DNOW-3]
_ = x[AMD3DNOWEXT-4]
_ = x[AMXBF16-5]
_ = x[AMXINT8-6]
_ = x[AMXTILE-7]
_ = x[AVX-8]
_ = x[AVX2-9]
_ = x[AVX512BF16-10]
_ = x[AVX512BITALG-11]
_ = x[AVX512BW-12]
_ = x[AVX512CD-13]
_ = x[AVX512DQ-14]
_ = x[AVX512ER-15]
_ = x[AVX512F-16]
_ = x[AVX512FP16-17]
_ = x[AVX512IFMA-18]
_ = x[AVX512PF-19]
_ = x[AVX512VBMI-20]
_ = x[AVX512VBMI2-21]
_ = x[AVX512VL-22]
_ = x[AVX512VNNI-23]
_ = x[AVX512VP2INTERSECT-24]
_ = x[AVX512VPOPCNTDQ-25]
_ = x[AVXSLOW-26]
_ = x[AVXVNNI-27]
_ = x[BMI1-28]
_ = x[BMI2-29]
_ = x[CETIBT-30]
_ = x[CETSS-31]
_ = x[CLDEMOTE-32]
_ = x[CLMUL-33]
_ = x[CLZERO-34]
_ = x[CMOV-35]
_ = x[CMPSB_SCADBS_SHORT-36]
_ = x[CMPXCHG8-37]
_ = x[CPBOOST-38]
_ = x[CX16-39]
_ = x[ENQCMD-40]
_ = x[ERMS-41]
_ = x[F16C-42]
_ = x[FMA3-43]
_ = x[FMA4-44]
_ = x[FXSR-45]
_ = x[FXSROPT-46]
_ = x[GFNI-47]
_ = x[HLE-48]
_ = x[HRESET-49]
_ = x[HTT-50]
_ = x[HWA-51]
_ = x[HYPERVISOR-52]
_ = x[IBPB-53]
_ = x[IBS-54]
_ = x[IBSBRNTRGT-55]
_ = x[IBSFETCHSAM-56]
_ = x[IBSFFV-57]
_ = x[IBSOPCNT-58]
_ = x[IBSOPCNTEXT-59]
_ = x[IBSOPSAM-60]
_ = x[IBSRDWROPCNT-61]
_ = x[IBSRIPINVALIDCHK-62]
_ = x[IBS_PREVENTHOST-63]
_ = x[INT_WBINVD-64]
_ = x[INVLPGB-65]
_ = x[LAHF-66]
_ = x[LAM-67]
_ = x[LBRVIRT-68]
_ = x[LZCNT-69]
_ = x[MCAOVERFLOW-70]
_ = x[MCOMMIT-71]
_ = x[MMX-72]
_ = x[MMXEXT-73]
_ = x[MOVBE-74]
_ = x[MOVDIR64B-75]
_ = x[MOVDIRI-76]
_ = x[MOVSB_ZL-77]
_ = x[MPX-78]
_ = x[MSRIRC-79]
_ = x[MSR_PAGEFLUSH-80]
_ = x[NRIPS-81]
_ = x[NX-82]
_ = x[OSXSAVE-83]
_ = x[PCONFIG-84]
_ = x[POPCNT-85]
_ = x[RDPRU-86]
_ = x[RDRAND-87]
_ = x[RDSEED-88]
_ = x[RDTSCP-89]
_ = x[RTM-90]
_ = x[RTM_ALWAYS_ABORT-91]
_ = x[SERIALIZE-92]
_ = x[SEV-93]
_ = x[SEV_64BIT-94]
_ = x[SEV_ALTERNATIVE-95]
_ = x[SEV_DEBUGSWAP-96]
_ = x[SEV_ES-97]
_ = x[SEV_RESTRICTED-98]
_ = x[SEV_SNP-99]
_ = x[SGX-100]
_ = x[SGXLC-101]
_ = x[SHA-102]
_ = x[SME-103]
_ = x[SME_COHERENT-104]
_ = x[SSE-105]
_ = x[SSE2-106]
_ = x[SSE3-107]
_ = x[SSE4-108]
_ = x[SSE42-109]
_ = x[SSE4A-110]
_ = x[SSSE3-111]
_ = x[STIBP-112]
_ = x[STOSB_SHORT-113]
_ = x[SUCCOR-114]
_ = x[SVM-115]
_ = x[SVMDA-116]
_ = x[SVMFBASID-117]
_ = x[SVML-118]
_ = x[SVMNP-119]
_ = x[SVMPF-120]
_ = x[SVMPFT-121]
_ = x[SYSCALL-122]
_ = x[SYSEE-123]
_ = x[TBM-124]
_ = x[TOPEXT-125]
_ = x[TME-126]
_ = x[TSCRATEMSR-127]
_ = x[TSXLDTRK-128]
_ = x[VAES-129]
_ = x[VMCBCLEAN-130]
_ = x[VMPL-131]
_ = x[VMSA_REGPROT-132]
_ = x[VMX-133]
_ = x[VPCLMULQDQ-134]
_ = x[VTE-135]
_ = x[WAITPKG-136]
_ = x[WBNOINVD-137]
_ = x[X87-138]
_ = x[XGETBV1-139]
_ = x[XOP-140]
_ = x[XSAVE-141]
_ = x[XSAVEC-142]
_ = x[XSAVEOPT-143]
_ = x[XSAVES-144]
_ = x[AESARM-145]
_ = x[ARMCPUID-146]
_ = x[ASIMD-147]
_ = x[ASIMDDP-148]
_ = x[ASIMDHP-149]
_ = x[ASIMDRDM-150]
_ = x[ATOMICS-151]
_ = x[CRC32-152]
_ = x[DCPOP-153]
_ = x[EVTSTRM-154]
_ = x[FCMA-155]
_ = x[FP-156]
_ = x[FPHP-157]
_ = x[GPA-158]
_ = x[JSCVT-159]
_ = x[LRCPC-160]
_ = x[PMULL-161]
_ = x[SHA1-162]
_ = x[SHA2-163]
_ = x[SHA3-164]
_ = x[SHA512-165]
_ = x[SM3-166]
_ = x[SM4-167]
_ = x[SVE-168]
_ = x[lastID-169]
_ = x[firstID-0]
}
const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXSLOWAVXVNNIBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCX16ENQCMDERMSF16CFMA3FMA4FXSRFXSROPTGFNIHLEHRESETHTTHWAHYPERVISORIBPBIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_PREVENTHOSTINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCOMMITMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTOPEXTTMETSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 58, 62, 72, 84, 92, 100, 108, 116, 123, 133, 143, 151, 161, 172, 180, 190, 208, 223, 230, 237, 241, 245, 251, 256, 264, 269, 275, 279, 297, 305, 312, 316, 322, 326, 330, 334, 338, 342, 349, 353, 356, 362, 365, 368, 378, 382, 385, 395, 406, 412, 420, 431, 439, 451, 467, 482, 492, 499, 503, 506, 513, 518, 529, 536, 539, 545, 550, 559, 566, 574, 577, 583, 596, 601, 603, 610, 617, 623, 628, 634, 640, 646, 649, 665, 674, 677, 686, 701, 714, 720, 734, 741, 744, 749, 752, 755, 767, 770, 774, 778, 782, 787, 792, 797, 802, 813, 819, 822, 827, 836, 840, 845, 850, 856, 863, 868, 871, 877, 880, 890, 898, 902, 911, 915, 927, 930, 940, 943, 950, 958, 961, 968, 971, 976, 982, 990, 996, 1002, 1010, 1015, 1022, 1029, 1037, 1044, 1049, 1054, 1061, 1065, 1067, 1071, 1074, 1079, 1084, 1089, 1093, 1097, 1101, 1107, 1110, 1113, 1116, 1122}
func (i FeatureID) String() string {
if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
return "FeatureID(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _FeatureID_name[_FeatureID_index[i]:_FeatureID_index[i+1]]
}
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[VendorUnknown-0]
_ = x[Intel-1]
_ = x[AMD-2]
_ = x[VIA-3]
_ = x[Transmeta-4]
_ = x[NSC-5]
_ = x[KVM-6]
_ = x[MSVM-7]
_ = x[VMware-8]
_ = x[XenHVM-9]
_ = x[Bhyve-10]
_ = x[Hygon-11]
_ = x[SiS-12]
_ = x[RDC-13]
_ = x[Ampere-14]
_ = x[ARM-15]
_ = x[Broadcom-16]
_ = x[Cavium-17]
_ = x[DEC-18]
_ = x[Fujitsu-19]
_ = x[Infineon-20]
_ = x[Motorola-21]
_ = x[NVIDIA-22]
_ = x[AMCC-23]
_ = x[Qualcomm-24]
_ = x[Marvell-25]
_ = x[lastVendor-26]
}
const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvelllastVendor"
var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 155}
func (i Vendor) String() string {
if i < 0 || i >= Vendor(len(_Vendor_index)-1) {
return "Vendor(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _Vendor_name[_Vendor_index[i]:_Vendor_index[i+1]]
}

121
vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go generated vendored Normal file
View File

@ -0,0 +1,121 @@
// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
package cpuid
import (
"runtime"
"strings"
"golang.org/x/sys/unix"
)
func detectOS(c *CPUInfo) bool {
if runtime.GOOS != "ios" {
tryToFillCPUInfoFomSysctl(c)
}
// There are no hw.optional sysctl values for the below features on Mac OS 11.0
// to detect their supported state dynamically. Assume the CPU features that
// Apple Silicon M1 supports to be available as a minimal set of features
// to all Go programs running on darwin/arm64.
// TODO: Add more if we know them.
c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2)
return true
}
func sysctlGetBool(name string) bool {
value, err := unix.SysctlUint32(name)
if err != nil {
return false
}
return value != 0
}
func sysctlGetString(name string) string {
value, err := unix.Sysctl(name)
if err != nil {
return ""
}
return value
}
func sysctlGetInt(unknown int, names ...string) int {
for _, name := range names {
value, err := unix.SysctlUint32(name)
if err != nil {
continue
}
if value != 0 {
return int(value)
}
}
return unknown
}
func sysctlGetInt64(unknown int, names ...string) int {
for _, name := range names {
value64, err := unix.SysctlUint64(name)
if err != nil {
continue
}
if int(value64) != unknown {
return int(value64)
}
}
return unknown
}
func setFeature(c *CPUInfo, name string, feature FeatureID) {
c.featureSet.setIf(sysctlGetBool(name), feature)
}
func tryToFillCPUInfoFomSysctl(c *CPUInfo) {
c.BrandName = sysctlGetString("machdep.cpu.brand_string")
if len(c.BrandName) != 0 {
c.VendorString = strings.Fields(c.BrandName)[0]
}
c.PhysicalCores = sysctlGetInt(runtime.NumCPU(), "hw.physicalcpu")
c.ThreadsPerCore = sysctlGetInt(1, "machdep.cpu.thread_count", "kern.num_threads") /
sysctlGetInt(1, "hw.physicalcpu")
c.LogicalCores = sysctlGetInt(runtime.NumCPU(), "machdep.cpu.core_count")
c.Family = sysctlGetInt(0, "machdep.cpu.family", "hw.cpufamily")
c.Model = sysctlGetInt(0, "machdep.cpu.model")
c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize")
c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize")
c.Cache.L1D = sysctlGetInt64(-1, "hw.l1icachesize")
c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize")
c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize")
// from https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile
setFeature(c, "hw.optional.arm.FEAT_AES", AESARM)
setFeature(c, "hw.optional.AdvSIMD", ASIMD)
setFeature(c, "hw.optional.arm.FEAT_DotProd", ASIMDDP)
setFeature(c, "hw.optional.arm.FEAT_RDM", ASIMDRDM)
setFeature(c, "hw.optional.FEAT_CRC32", CRC32)
setFeature(c, "hw.optional.arm.FEAT_DPB", DCPOP)
// setFeature(c, "", EVTSTRM)
setFeature(c, "hw.optional.arm.FEAT_FCMA", FCMA)
setFeature(c, "hw.optional.arm.FEAT_FP", FP)
setFeature(c, "hw.optional.arm.FEAT_FP16", FPHP)
setFeature(c, "hw.optional.arm.FEAT_PAuth", GPA)
setFeature(c, "hw.optional.arm.FEAT_JSCVT", JSCVT)
setFeature(c, "hw.optional.arm.FEAT_LRCPC", LRCPC)
setFeature(c, "hw.optional.arm.FEAT_PMULL", PMULL)
setFeature(c, "hw.optional.arm.FEAT_SHA1", SHA1)
setFeature(c, "hw.optional.arm.FEAT_SHA256", SHA2)
setFeature(c, "hw.optional.arm.FEAT_SHA3", SHA3)
setFeature(c, "hw.optional.arm.FEAT_SHA512", SHA512)
// setFeature(c, "", SM3)
// setFeature(c, "", SM4)
setFeature(c, "hw.optional.arm.FEAT_SVE", SVE)
// from empirical observation
setFeature(c, "hw.optional.AdvSIMD_HPFPCvt", ASIMDHP)
setFeature(c, "hw.optional.armv8_1_atomics", ATOMICS)
setFeature(c, "hw.optional.floatingpoint", FP)
setFeature(c, "hw.optional.armv8_2_sha3", SHA3)
setFeature(c, "hw.optional.armv8_2_sha512", SHA512)
setFeature(c, "hw.optional.armv8_3_compnum", FCMA)
setFeature(c, "hw.optional.armv8_crc32", CRC32)
}

130
vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go generated vendored Normal file
View File

@ -0,0 +1,130 @@
// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file located
// here https://github.com/golang/sys/blob/master/LICENSE
package cpuid
import (
"encoding/binary"
"io/ioutil"
"runtime"
)
// HWCAP bits.
const (
hwcap_FP = 1 << 0
hwcap_ASIMD = 1 << 1
hwcap_EVTSTRM = 1 << 2
hwcap_AES = 1 << 3
hwcap_PMULL = 1 << 4
hwcap_SHA1 = 1 << 5
hwcap_SHA2 = 1 << 6
hwcap_CRC32 = 1 << 7
hwcap_ATOMICS = 1 << 8
hwcap_FPHP = 1 << 9
hwcap_ASIMDHP = 1 << 10
hwcap_CPUID = 1 << 11
hwcap_ASIMDRDM = 1 << 12
hwcap_JSCVT = 1 << 13
hwcap_FCMA = 1 << 14
hwcap_LRCPC = 1 << 15
hwcap_DCPOP = 1 << 16
hwcap_SHA3 = 1 << 17
hwcap_SM3 = 1 << 18
hwcap_SM4 = 1 << 19
hwcap_ASIMDDP = 1 << 20
hwcap_SHA512 = 1 << 21
hwcap_SVE = 1 << 22
hwcap_ASIMDFHM = 1 << 23
)
func detectOS(c *CPUInfo) bool {
// For now assuming no hyperthreading is reasonable.
c.LogicalCores = runtime.NumCPU()
c.PhysicalCores = c.LogicalCores
c.ThreadsPerCore = 1
if hwcap == 0 {
// We did not get values from the runtime.
// Try reading /proc/self/auxv
// From https://github.com/golang/sys
const (
_AT_HWCAP = 16
_AT_HWCAP2 = 26
uintSize = int(32 << (^uint(0) >> 63))
)
buf, err := ioutil.ReadFile("/proc/self/auxv")
if err != nil {
// e.g. on android /proc/self/auxv is not accessible, so silently
// ignore the error and leave Initialized = false. On some
// architectures (e.g. arm64) doinit() implements a fallback
// readout and will set Initialized = true again.
return false
}
bo := binary.LittleEndian
for len(buf) >= 2*(uintSize/8) {
var tag, val uint
switch uintSize {
case 32:
tag = uint(bo.Uint32(buf[0:]))
val = uint(bo.Uint32(buf[4:]))
buf = buf[8:]
case 64:
tag = uint(bo.Uint64(buf[0:]))
val = uint(bo.Uint64(buf[8:]))
buf = buf[16:]
}
switch tag {
case _AT_HWCAP:
hwcap = val
case _AT_HWCAP2:
// Not used
}
}
if hwcap == 0 {
return false
}
}
// HWCap was populated by the runtime from the auxiliary vector.
// Use HWCap information since reading aarch64 system registers
// is not supported in user space on older linux kernels.
c.featureSet.setIf(isSet(hwcap, hwcap_AES), AESARM)
c.featureSet.setIf(isSet(hwcap, hwcap_ASIMD), ASIMD)
c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDDP), ASIMDDP)
c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDHP), ASIMDHP)
c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDRDM), ASIMDRDM)
c.featureSet.setIf(isSet(hwcap, hwcap_CPUID), ARMCPUID)
c.featureSet.setIf(isSet(hwcap, hwcap_CRC32), CRC32)
c.featureSet.setIf(isSet(hwcap, hwcap_DCPOP), DCPOP)
c.featureSet.setIf(isSet(hwcap, hwcap_EVTSTRM), EVTSTRM)
c.featureSet.setIf(isSet(hwcap, hwcap_FCMA), FCMA)
c.featureSet.setIf(isSet(hwcap, hwcap_FP), FP)
c.featureSet.setIf(isSet(hwcap, hwcap_FPHP), FPHP)
c.featureSet.setIf(isSet(hwcap, hwcap_JSCVT), JSCVT)
c.featureSet.setIf(isSet(hwcap, hwcap_LRCPC), LRCPC)
c.featureSet.setIf(isSet(hwcap, hwcap_PMULL), PMULL)
c.featureSet.setIf(isSet(hwcap, hwcap_SHA1), SHA1)
c.featureSet.setIf(isSet(hwcap, hwcap_SHA2), SHA2)
c.featureSet.setIf(isSet(hwcap, hwcap_SHA3), SHA3)
c.featureSet.setIf(isSet(hwcap, hwcap_SHA512), SHA512)
c.featureSet.setIf(isSet(hwcap, hwcap_SM3), SM3)
c.featureSet.setIf(isSet(hwcap, hwcap_SM4), SM4)
c.featureSet.setIf(isSet(hwcap, hwcap_SVE), SVE)
// The Samsung S9+ kernel reports support for atomics, but not all cores
// actually support them, resulting in SIGILL. See issue #28431.
// TODO(elias.naur): Only disable the optimization on bad chipsets on android.
c.featureSet.setIf(isSet(hwcap, hwcap_ATOMICS) && runtime.GOOS != "android", ATOMICS)
return true
}
func isSet(hwc uint, value uint) bool {
return hwc&value != 0
}

16
vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go generated vendored Normal file
View File

@ -0,0 +1,16 @@
// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
//go:build arm64 && !linux && !darwin
// +build arm64,!linux,!darwin
package cpuid
import "runtime"
func detectOS(c *CPUInfo) bool {
c.PhysicalCores = runtime.NumCPU()
// For now assuming 1 thread per core...
c.ThreadsPerCore = 1
c.LogicalCores = c.PhysicalCores
return false
}

View File

@ -0,0 +1,8 @@
// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
//go:build nounsafe
// +build nounsafe
package cpuid
var hwcap uint

View File

@ -0,0 +1,11 @@
// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
//go:build !nounsafe
// +build !nounsafe
package cpuid
import _ "unsafe" // needed for go:linkname
//go:linkname hwcap internal/cpu.HWCap
var hwcap uint

View File

@ -0,0 +1,15 @@
#!/bin/sh
set -e
go tool dist list | while IFS=/ read os arch; do
echo "Checking $os/$arch..."
echo " normal"
GOARCH=$arch GOOS=$os go build -o /dev/null .
echo " noasm"
GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null .
echo " appengine"
GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null .
echo " noasm,appengine"
GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null .
done