[experiment] add alternative wasm sqlite3 implementation available via build-tag (#2863)

This allows for building GoToSocial with [SQLite transpiled to WASM](https://github.com/ncruces/go-sqlite3) and accessed through [Wazero](https://wazero.io/).
This commit is contained in:
kim
2024-05-27 15:46:15 +00:00
committed by GitHub
parent cce21c11cb
commit 1e7b32490d
398 changed files with 86174 additions and 684 deletions

View File

@ -0,0 +1,164 @@
package descriptor
import "math/bits"
// Table is a data structure mapping 32 bit descriptor to items.
//
// # Negative keys are invalid.
//
// Negative keys (e.g. -1) are invalid inputs and will return a corresponding
// not-found value. This matches POSIX behavior of file descriptors.
// See https://pubs.opengroup.org/onlinepubs/9699919799/functions/dirfd.html#tag_16_90
//
// # Data structure design
//
// The data structure optimizes for memory density and lookup performance,
// trading off compute at insertion time. This is a useful compromise for the
// use cases we employ it with: items are usually accessed a lot more often
// than they are inserted, each operation requires a table lookup, so we are
// better off spending extra compute to insert items in the table in order to
// get cheaper lookups. Memory efficiency is also crucial to support scaling
// with programs that maintain thousands of items: having a high or non-linear
// memory-to-item ratio could otherwise be used as an attack vector by
// malicious applications attempting to damage performance of the host.
type Table[Key ~int32, Item any] struct {
masks []uint64
items []Item
}
// Len returns the number of items stored in the table.
func (t *Table[Key, Item]) Len() (n int) {
// We could make this a O(1) operation if we cached the number of items in
// the table. More state usually means more problems, so until we have a
// clear need for this, the simple implementation may be a better trade off.
for _, mask := range t.masks {
n += bits.OnesCount64(mask)
}
return n
}
// grow ensures that t has enough room for n items, potentially reallocating the
// internal buffers if their capacity was too small to hold this many items.
func (t *Table[Key, Item]) grow(n int) {
// Round up to a multiple of 64 since this is the smallest increment due to
// using 64 bits masks.
n = (n*64 + 63) / 64
if n > len(t.masks) {
masks := make([]uint64, n)
copy(masks, t.masks)
items := make([]Item, n*64)
copy(items, t.items)
t.masks = masks
t.items = items
}
}
// Insert inserts the given item to the table, returning the key that it is
// mapped to or false if the table was full.
//
// The method does not perform deduplication, it is possible for the same item
// to be inserted multiple times, each insertion will return a different key.
func (t *Table[Key, Item]) Insert(item Item) (key Key, ok bool) {
offset := 0
insert:
// Note: this loop could be made a lot more efficient using vectorized
// operations: 256 bits vector registers would yield a theoretical 4x
// speed up (e.g. using AVX2).
for index, mask := range t.masks[offset:] {
if ^mask != 0 { // not full?
shift := bits.TrailingZeros64(^mask)
index += offset
key = Key(index)*64 + Key(shift)
t.items[key] = item
t.masks[index] = mask | uint64(1<<shift)
return key, key >= 0
}
}
offset = len(t.masks)
n := 2 * len(t.masks)
if n == 0 {
n = 1
}
t.grow(n)
goto insert
}
// Lookup returns the item associated with the given key (may be nil).
func (t *Table[Key, Item]) Lookup(key Key) (item Item, found bool) {
if key < 0 { // invalid key
return
}
if i := int(key); i >= 0 && i < len(t.items) {
index := uint(key) / 64
shift := uint(key) % 64
if (t.masks[index] & (1 << shift)) != 0 {
item, found = t.items[i], true
}
}
return
}
// InsertAt inserts the given `item` at the item descriptor `key`. This returns
// false if the insert was impossible due to negative key.
func (t *Table[Key, Item]) InsertAt(item Item, key Key) bool {
if key < 0 {
return false
}
if diff := int(key) - t.Len(); diff > 0 {
t.grow(diff)
}
index := uint(key) / 64
shift := uint(key) % 64
t.masks[index] |= 1 << shift
t.items[key] = item
return true
}
// Delete deletes the item stored at the given key from the table.
func (t *Table[Key, Item]) Delete(key Key) {
if key < 0 { // invalid key
return
}
if index, shift := key/64, key%64; int(index) < len(t.masks) {
mask := t.masks[index]
if (mask & (1 << shift)) != 0 {
var zero Item
t.items[key] = zero
t.masks[index] = mask & ^uint64(1<<shift)
}
}
}
// Range calls f for each item and its associated key in the table. The function
// f might return false to interupt the iteration.
func (t *Table[Key, Item]) Range(f func(Key, Item) bool) {
for i, mask := range t.masks {
if mask == 0 {
continue
}
for j := Key(0); j < 64; j++ {
if (mask & (1 << j)) == 0 {
continue
}
if key := Key(i)*64 + j; !f(key, t.items[key]) {
return
}
}
}
}
// Reset clears the content of the table.
func (t *Table[Key, Item]) Reset() {
for i := range t.masks {
t.masks[i] = 0
}
var zero Item
for i := range t.items {
t.items[i] = zero
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,22 @@
package interpreter
import (
"bytes"
)
func format(ops []unionOperation) string {
buf := bytes.NewBuffer(nil)
_, _ = buf.WriteString(".entrypoint\n")
for i := range ops {
op := &ops[i]
str := op.String()
isLabel := op.Kind == operationKindLabel
if !isLabel {
const indent = "\t"
str = indent + str
}
_, _ = buf.WriteString(str + "\n")
}
return buf.String()
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,767 @@
package interpreter
import (
"fmt"
"github.com/tetratelabs/wazero/internal/wasm"
)
// signature represents how a Wasm opcode
// manipulates the value stacks in terms of value types.
type signature struct {
in, out []unsignedType
}
var (
signature_None_None = &signature{}
signature_Unknown_None = &signature{
in: []unsignedType{unsignedTypeUnknown},
}
signature_None_I32 = &signature{
out: []unsignedType{unsignedTypeI32},
}
signature_None_I64 = &signature{
out: []unsignedType{unsignedTypeI64},
}
signature_None_V128 = &signature{
out: []unsignedType{unsignedTypeV128},
}
signature_None_F32 = &signature{
out: []unsignedType{unsignedTypeF32},
}
signature_None_F64 = &signature{
out: []unsignedType{unsignedTypeF64},
}
signature_I32_None = &signature{
in: []unsignedType{unsignedTypeI32},
}
signature_I64_None = &signature{
in: []unsignedType{unsignedTypeI64},
}
signature_F32_None = &signature{
in: []unsignedType{unsignedTypeF32},
}
signature_F64_None = &signature{
in: []unsignedType{unsignedTypeF64},
}
signature_V128_None = &signature{
in: []unsignedType{unsignedTypeV128},
}
signature_I32_I32 = &signature{
in: []unsignedType{unsignedTypeI32},
out: []unsignedType{unsignedTypeI32},
}
signature_I32_I64 = &signature{
in: []unsignedType{unsignedTypeI32},
out: []unsignedType{unsignedTypeI64},
}
signature_I64_I64 = &signature{
in: []unsignedType{unsignedTypeI64},
out: []unsignedType{unsignedTypeI64},
}
signature_I32_F32 = &signature{
in: []unsignedType{unsignedTypeI32},
out: []unsignedType{unsignedTypeF32},
}
signature_I32_F64 = &signature{
in: []unsignedType{unsignedTypeI32},
out: []unsignedType{unsignedTypeF64},
}
signature_I64_I32 = &signature{
in: []unsignedType{unsignedTypeI64},
out: []unsignedType{unsignedTypeI32},
}
signature_I64_F32 = &signature{
in: []unsignedType{unsignedTypeI64},
out: []unsignedType{unsignedTypeF32},
}
signature_I64_F64 = &signature{
in: []unsignedType{unsignedTypeI64},
out: []unsignedType{unsignedTypeF64},
}
signature_F32_I32 = &signature{
in: []unsignedType{unsignedTypeF32},
out: []unsignedType{unsignedTypeI32},
}
signature_F32_I64 = &signature{
in: []unsignedType{unsignedTypeF32},
out: []unsignedType{unsignedTypeI64},
}
signature_F32_F64 = &signature{
in: []unsignedType{unsignedTypeF32},
out: []unsignedType{unsignedTypeF64},
}
signature_F32_F32 = &signature{
in: []unsignedType{unsignedTypeF32},
out: []unsignedType{unsignedTypeF32},
}
signature_F64_I32 = &signature{
in: []unsignedType{unsignedTypeF64},
out: []unsignedType{unsignedTypeI32},
}
signature_F64_F32 = &signature{
in: []unsignedType{unsignedTypeF64},
out: []unsignedType{unsignedTypeF32},
}
signature_F64_I64 = &signature{
in: []unsignedType{unsignedTypeF64},
out: []unsignedType{unsignedTypeI64},
}
signature_F64_F64 = &signature{
in: []unsignedType{unsignedTypeF64},
out: []unsignedType{unsignedTypeF64},
}
signature_I32I32_None = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeI32},
}
signature_I32I32_I32 = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeI32},
out: []unsignedType{unsignedTypeI32},
}
signature_I32I64_None = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeI64},
}
signature_I32F32_None = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeF32},
}
signature_I32F64_None = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeF64},
}
signature_I64I32_I32 = &signature{
in: []unsignedType{unsignedTypeI64, unsignedTypeI32},
out: []unsignedType{unsignedTypeI32},
}
signature_I64I64_I32 = &signature{
in: []unsignedType{unsignedTypeI64, unsignedTypeI64},
out: []unsignedType{unsignedTypeI32},
}
signature_I64I64_I64 = &signature{
in: []unsignedType{unsignedTypeI64, unsignedTypeI64},
out: []unsignedType{unsignedTypeI64},
}
signature_F32F32_I32 = &signature{
in: []unsignedType{unsignedTypeF32, unsignedTypeF32},
out: []unsignedType{unsignedTypeI32},
}
signature_F32F32_F32 = &signature{
in: []unsignedType{unsignedTypeF32, unsignedTypeF32},
out: []unsignedType{unsignedTypeF32},
}
signature_F64F64_I32 = &signature{
in: []unsignedType{unsignedTypeF64, unsignedTypeF64},
out: []unsignedType{unsignedTypeI32},
}
signature_F64F64_F64 = &signature{
in: []unsignedType{unsignedTypeF64, unsignedTypeF64},
out: []unsignedType{unsignedTypeF64},
}
signature_I32I32I32_None = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI32},
}
signature_I32I64I32_None = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI32},
}
signature_UnknownUnknownI32_Unknown = &signature{
in: []unsignedType{unsignedTypeUnknown, unsignedTypeUnknown, unsignedTypeI32},
out: []unsignedType{unsignedTypeUnknown},
}
signature_V128V128_V128 = &signature{
in: []unsignedType{unsignedTypeV128, unsignedTypeV128},
out: []unsignedType{unsignedTypeV128},
}
signature_V128V128V128_V32 = &signature{
in: []unsignedType{unsignedTypeV128, unsignedTypeV128, unsignedTypeV128},
out: []unsignedType{unsignedTypeV128},
}
signature_I32_V128 = &signature{
in: []unsignedType{unsignedTypeI32},
out: []unsignedType{unsignedTypeV128},
}
signature_I32V128_None = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeV128},
}
signature_I32V128_V128 = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeV128},
out: []unsignedType{unsignedTypeV128},
}
signature_V128I32_V128 = &signature{
in: []unsignedType{unsignedTypeV128, unsignedTypeI32},
out: []unsignedType{unsignedTypeV128},
}
signature_V128I64_V128 = &signature{
in: []unsignedType{unsignedTypeV128, unsignedTypeI64},
out: []unsignedType{unsignedTypeV128},
}
signature_V128F32_V128 = &signature{
in: []unsignedType{unsignedTypeV128, unsignedTypeF32},
out: []unsignedType{unsignedTypeV128},
}
signature_V128F64_V128 = &signature{
in: []unsignedType{unsignedTypeV128, unsignedTypeF64},
out: []unsignedType{unsignedTypeV128},
}
signature_V128_I32 = &signature{
in: []unsignedType{unsignedTypeV128},
out: []unsignedType{unsignedTypeI32},
}
signature_V128_I64 = &signature{
in: []unsignedType{unsignedTypeV128},
out: []unsignedType{unsignedTypeI64},
}
signature_V128_F32 = &signature{
in: []unsignedType{unsignedTypeV128},
out: []unsignedType{unsignedTypeF32},
}
signature_V128_F64 = &signature{
in: []unsignedType{unsignedTypeV128},
out: []unsignedType{unsignedTypeF64},
}
signature_V128_V128 = &signature{
in: []unsignedType{unsignedTypeV128},
out: []unsignedType{unsignedTypeV128},
}
signature_I64_V128 = &signature{
in: []unsignedType{unsignedTypeI64},
out: []unsignedType{unsignedTypeV128},
}
signature_F32_V128 = &signature{
in: []unsignedType{unsignedTypeF32},
out: []unsignedType{unsignedTypeV128},
}
signature_F64_V128 = &signature{
in: []unsignedType{unsignedTypeF64},
out: []unsignedType{unsignedTypeV128},
}
signature_I32I64_I64 = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeI64},
out: []unsignedType{unsignedTypeI64},
}
signature_I32I32I64_I32 = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI64},
out: []unsignedType{unsignedTypeI32},
}
signature_I32I64I64_I32 = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI64},
out: []unsignedType{unsignedTypeI32},
}
signature_I32I32I32_I32 = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI32},
out: []unsignedType{unsignedTypeI32},
}
signature_I32I64I64_I64 = &signature{
in: []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI64},
out: []unsignedType{unsignedTypeI64},
}
)
// wasmOpcodeSignature returns the signature of given Wasm opcode.
// Note that some of opcodes' signature vary depending on
// the function instance (for example, local types).
// "index" parameter is not used by most of opcodes.
// The returned signature is used for stack validation when lowering Wasm's opcodes to interpreterir.
func (c *compiler) wasmOpcodeSignature(op wasm.Opcode, index uint32) (*signature, error) {
switch op {
case wasm.OpcodeUnreachable, wasm.OpcodeNop, wasm.OpcodeBlock, wasm.OpcodeLoop:
return signature_None_None, nil
case wasm.OpcodeIf:
return signature_I32_None, nil
case wasm.OpcodeElse, wasm.OpcodeEnd, wasm.OpcodeBr:
return signature_None_None, nil
case wasm.OpcodeBrIf, wasm.OpcodeBrTable:
return signature_I32_None, nil
case wasm.OpcodeReturn:
return signature_None_None, nil
case wasm.OpcodeCall:
return c.funcTypeToSigs.get(c.funcs[index], false /* direct */), nil
case wasm.OpcodeCallIndirect:
return c.funcTypeToSigs.get(index, true /* call_indirect */), nil
case wasm.OpcodeDrop:
return signature_Unknown_None, nil
case wasm.OpcodeSelect, wasm.OpcodeTypedSelect:
return signature_UnknownUnknownI32_Unknown, nil
case wasm.OpcodeLocalGet:
inputLen := uint32(len(c.sig.Params))
if l := uint32(len(c.localTypes)) + inputLen; index >= l {
return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l)
}
var t wasm.ValueType
if index < inputLen {
t = c.sig.Params[index]
} else {
t = c.localTypes[index-inputLen]
}
return wasmValueTypeToUnsignedOutSignature(t), nil
case wasm.OpcodeLocalSet:
inputLen := uint32(len(c.sig.Params))
if l := uint32(len(c.localTypes)) + inputLen; index >= l {
return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l)
}
var t wasm.ValueType
if index < inputLen {
t = c.sig.Params[index]
} else {
t = c.localTypes[index-inputLen]
}
return wasmValueTypeToUnsignedInSignature(t), nil
case wasm.OpcodeLocalTee:
inputLen := uint32(len(c.sig.Params))
if l := uint32(len(c.localTypes)) + inputLen; index >= l {
return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l)
}
var t wasm.ValueType
if index < inputLen {
t = c.sig.Params[index]
} else {
t = c.localTypes[index-inputLen]
}
return wasmValueTypeToUnsignedInOutSignature(t), nil
case wasm.OpcodeGlobalGet:
if len(c.globals) <= int(index) {
return nil, fmt.Errorf("invalid global index for global.get %d >= %d", index, len(c.globals))
}
return wasmValueTypeToUnsignedOutSignature(c.globals[index].ValType), nil
case wasm.OpcodeGlobalSet:
if len(c.globals) <= int(index) {
return nil, fmt.Errorf("invalid global index for global.get %d >= %d", index, len(c.globals))
}
return wasmValueTypeToUnsignedInSignature(c.globals[index].ValType), nil
case wasm.OpcodeI32Load:
return signature_I32_I32, nil
case wasm.OpcodeI64Load:
return signature_I32_I64, nil
case wasm.OpcodeF32Load:
return signature_I32_F32, nil
case wasm.OpcodeF64Load:
return signature_I32_F64, nil
case wasm.OpcodeI32Load8S, wasm.OpcodeI32Load8U, wasm.OpcodeI32Load16S, wasm.OpcodeI32Load16U:
return signature_I32_I32, nil
case wasm.OpcodeI64Load8S, wasm.OpcodeI64Load8U, wasm.OpcodeI64Load16S, wasm.OpcodeI64Load16U,
wasm.OpcodeI64Load32S, wasm.OpcodeI64Load32U:
return signature_I32_I64, nil
case wasm.OpcodeI32Store:
return signature_I32I32_None, nil
case wasm.OpcodeI64Store:
return signature_I32I64_None, nil
case wasm.OpcodeF32Store:
return signature_I32F32_None, nil
case wasm.OpcodeF64Store:
return signature_I32F64_None, nil
case wasm.OpcodeI32Store8:
return signature_I32I32_None, nil
case wasm.OpcodeI32Store16:
return signature_I32I32_None, nil
case wasm.OpcodeI64Store8:
return signature_I32I64_None, nil
case wasm.OpcodeI64Store16:
return signature_I32I64_None, nil
case wasm.OpcodeI64Store32:
return signature_I32I64_None, nil
case wasm.OpcodeMemorySize:
return signature_None_I32, nil
case wasm.OpcodeMemoryGrow:
return signature_I32_I32, nil
case wasm.OpcodeI32Const:
return signature_None_I32, nil
case wasm.OpcodeI64Const:
return signature_None_I64, nil
case wasm.OpcodeF32Const:
return signature_None_F32, nil
case wasm.OpcodeF64Const:
return signature_None_F64, nil
case wasm.OpcodeI32Eqz:
return signature_I32_I32, nil
case wasm.OpcodeI32Eq, wasm.OpcodeI32Ne, wasm.OpcodeI32LtS,
wasm.OpcodeI32LtU, wasm.OpcodeI32GtS, wasm.OpcodeI32GtU,
wasm.OpcodeI32LeS, wasm.OpcodeI32LeU, wasm.OpcodeI32GeS,
wasm.OpcodeI32GeU:
return signature_I32I32_I32, nil
case wasm.OpcodeI64Eqz:
return signature_I64_I32, nil
case wasm.OpcodeI64Eq, wasm.OpcodeI64Ne, wasm.OpcodeI64LtS,
wasm.OpcodeI64LtU, wasm.OpcodeI64GtS, wasm.OpcodeI64GtU,
wasm.OpcodeI64LeS, wasm.OpcodeI64LeU, wasm.OpcodeI64GeS,
wasm.OpcodeI64GeU:
return signature_I64I64_I32, nil
case wasm.OpcodeF32Eq, wasm.OpcodeF32Ne, wasm.OpcodeF32Lt,
wasm.OpcodeF32Gt, wasm.OpcodeF32Le, wasm.OpcodeF32Ge:
return signature_F32F32_I32, nil
case wasm.OpcodeF64Eq, wasm.OpcodeF64Ne, wasm.OpcodeF64Lt,
wasm.OpcodeF64Gt, wasm.OpcodeF64Le, wasm.OpcodeF64Ge:
return signature_F64F64_I32, nil
case wasm.OpcodeI32Clz, wasm.OpcodeI32Ctz, wasm.OpcodeI32Popcnt:
return signature_I32_I32, nil
case wasm.OpcodeI32Add, wasm.OpcodeI32Sub, wasm.OpcodeI32Mul,
wasm.OpcodeI32DivS, wasm.OpcodeI32DivU, wasm.OpcodeI32RemS,
wasm.OpcodeI32RemU, wasm.OpcodeI32And, wasm.OpcodeI32Or,
wasm.OpcodeI32Xor, wasm.OpcodeI32Shl, wasm.OpcodeI32ShrS,
wasm.OpcodeI32ShrU, wasm.OpcodeI32Rotl, wasm.OpcodeI32Rotr:
return signature_I32I32_I32, nil
case wasm.OpcodeI64Clz, wasm.OpcodeI64Ctz, wasm.OpcodeI64Popcnt:
return signature_I64_I64, nil
case wasm.OpcodeI64Add, wasm.OpcodeI64Sub, wasm.OpcodeI64Mul,
wasm.OpcodeI64DivS, wasm.OpcodeI64DivU, wasm.OpcodeI64RemS,
wasm.OpcodeI64RemU, wasm.OpcodeI64And, wasm.OpcodeI64Or,
wasm.OpcodeI64Xor, wasm.OpcodeI64Shl, wasm.OpcodeI64ShrS,
wasm.OpcodeI64ShrU, wasm.OpcodeI64Rotl, wasm.OpcodeI64Rotr:
return signature_I64I64_I64, nil
case wasm.OpcodeF32Abs, wasm.OpcodeF32Neg, wasm.OpcodeF32Ceil,
wasm.OpcodeF32Floor, wasm.OpcodeF32Trunc, wasm.OpcodeF32Nearest,
wasm.OpcodeF32Sqrt:
return signature_F32_F32, nil
case wasm.OpcodeF32Add, wasm.OpcodeF32Sub, wasm.OpcodeF32Mul,
wasm.OpcodeF32Div, wasm.OpcodeF32Min, wasm.OpcodeF32Max,
wasm.OpcodeF32Copysign:
return signature_F32F32_F32, nil
case wasm.OpcodeF64Abs, wasm.OpcodeF64Neg, wasm.OpcodeF64Ceil,
wasm.OpcodeF64Floor, wasm.OpcodeF64Trunc, wasm.OpcodeF64Nearest,
wasm.OpcodeF64Sqrt:
return signature_F64_F64, nil
case wasm.OpcodeF64Add, wasm.OpcodeF64Sub, wasm.OpcodeF64Mul,
wasm.OpcodeF64Div, wasm.OpcodeF64Min, wasm.OpcodeF64Max,
wasm.OpcodeF64Copysign:
return signature_F64F64_F64, nil
case wasm.OpcodeI32WrapI64:
return signature_I64_I32, nil
case wasm.OpcodeI32TruncF32S, wasm.OpcodeI32TruncF32U:
return signature_F32_I32, nil
case wasm.OpcodeI32TruncF64S, wasm.OpcodeI32TruncF64U:
return signature_F64_I32, nil
case wasm.OpcodeI64ExtendI32S, wasm.OpcodeI64ExtendI32U:
return signature_I32_I64, nil
case wasm.OpcodeI64TruncF32S, wasm.OpcodeI64TruncF32U:
return signature_F32_I64, nil
case wasm.OpcodeI64TruncF64S, wasm.OpcodeI64TruncF64U:
return signature_F64_I64, nil
case wasm.OpcodeF32ConvertI32S, wasm.OpcodeF32ConvertI32U:
return signature_I32_F32, nil
case wasm.OpcodeF32ConvertI64S, wasm.OpcodeF32ConvertI64U:
return signature_I64_F32, nil
case wasm.OpcodeF32DemoteF64:
return signature_F64_F32, nil
case wasm.OpcodeF64ConvertI32S, wasm.OpcodeF64ConvertI32U:
return signature_I32_F64, nil
case wasm.OpcodeF64ConvertI64S, wasm.OpcodeF64ConvertI64U:
return signature_I64_F64, nil
case wasm.OpcodeF64PromoteF32:
return signature_F32_F64, nil
case wasm.OpcodeI32ReinterpretF32:
return signature_F32_I32, nil
case wasm.OpcodeI64ReinterpretF64:
return signature_F64_I64, nil
case wasm.OpcodeF32ReinterpretI32:
return signature_I32_F32, nil
case wasm.OpcodeF64ReinterpretI64:
return signature_I64_F64, nil
case wasm.OpcodeI32Extend8S, wasm.OpcodeI32Extend16S:
return signature_I32_I32, nil
case wasm.OpcodeI64Extend8S, wasm.OpcodeI64Extend16S, wasm.OpcodeI64Extend32S:
return signature_I64_I64, nil
case wasm.OpcodeTableGet:
// table.get takes table's offset and pushes the ref type value of opaque pointer as i64 value onto the stack.
return signature_I32_I64, nil
case wasm.OpcodeTableSet:
// table.set takes table's offset and the ref type value of opaque pointer as i64 value.
return signature_I32I64_None, nil
case wasm.OpcodeRefFunc:
// ref.func is translated as pushing the compiled function's opaque pointer (uint64) at interpreterir layer.
return signature_None_I64, nil
case wasm.OpcodeRefIsNull:
// ref.is_null is translated as checking if the uint64 on the top of the stack (opaque pointer) is zero or not.
return signature_I64_I32, nil
case wasm.OpcodeRefNull:
// ref.null is translated as i64.const 0.
return signature_None_I64, nil
case wasm.OpcodeMiscPrefix:
switch miscOp := c.body[c.pc+1]; miscOp {
case wasm.OpcodeMiscI32TruncSatF32S, wasm.OpcodeMiscI32TruncSatF32U:
return signature_F32_I32, nil
case wasm.OpcodeMiscI32TruncSatF64S, wasm.OpcodeMiscI32TruncSatF64U:
return signature_F64_I32, nil
case wasm.OpcodeMiscI64TruncSatF32S, wasm.OpcodeMiscI64TruncSatF32U:
return signature_F32_I64, nil
case wasm.OpcodeMiscI64TruncSatF64S, wasm.OpcodeMiscI64TruncSatF64U:
return signature_F64_I64, nil
case wasm.OpcodeMiscMemoryInit, wasm.OpcodeMiscMemoryCopy, wasm.OpcodeMiscMemoryFill,
wasm.OpcodeMiscTableInit, wasm.OpcodeMiscTableCopy:
return signature_I32I32I32_None, nil
case wasm.OpcodeMiscDataDrop, wasm.OpcodeMiscElemDrop:
return signature_None_None, nil
case wasm.OpcodeMiscTableGrow:
return signature_I64I32_I32, nil
case wasm.OpcodeMiscTableSize:
return signature_None_I32, nil
case wasm.OpcodeMiscTableFill:
return signature_I32I64I32_None, nil
default:
return nil, fmt.Errorf("unsupported misc instruction in interpreterir: 0x%x", op)
}
case wasm.OpcodeVecPrefix:
switch vecOp := c.body[c.pc+1]; vecOp {
case wasm.OpcodeVecV128Const:
return signature_None_V128, nil
case wasm.OpcodeVecV128Load, wasm.OpcodeVecV128Load8x8s, wasm.OpcodeVecV128Load8x8u,
wasm.OpcodeVecV128Load16x4s, wasm.OpcodeVecV128Load16x4u, wasm.OpcodeVecV128Load32x2s,
wasm.OpcodeVecV128Load32x2u, wasm.OpcodeVecV128Load8Splat, wasm.OpcodeVecV128Load16Splat,
wasm.OpcodeVecV128Load32Splat, wasm.OpcodeVecV128Load64Splat, wasm.OpcodeVecV128Load32zero,
wasm.OpcodeVecV128Load64zero:
return signature_I32_V128, nil
case wasm.OpcodeVecV128Load8Lane, wasm.OpcodeVecV128Load16Lane,
wasm.OpcodeVecV128Load32Lane, wasm.OpcodeVecV128Load64Lane:
return signature_I32V128_V128, nil
case wasm.OpcodeVecV128Store,
wasm.OpcodeVecV128Store8Lane,
wasm.OpcodeVecV128Store16Lane,
wasm.OpcodeVecV128Store32Lane,
wasm.OpcodeVecV128Store64Lane:
return signature_I32V128_None, nil
case wasm.OpcodeVecI8x16ExtractLaneS,
wasm.OpcodeVecI8x16ExtractLaneU,
wasm.OpcodeVecI16x8ExtractLaneS,
wasm.OpcodeVecI16x8ExtractLaneU,
wasm.OpcodeVecI32x4ExtractLane:
return signature_V128_I32, nil
case wasm.OpcodeVecI64x2ExtractLane:
return signature_V128_I64, nil
case wasm.OpcodeVecF32x4ExtractLane:
return signature_V128_F32, nil
case wasm.OpcodeVecF64x2ExtractLane:
return signature_V128_F64, nil
case wasm.OpcodeVecI8x16ReplaceLane, wasm.OpcodeVecI16x8ReplaceLane, wasm.OpcodeVecI32x4ReplaceLane,
wasm.OpcodeVecI8x16Shl, wasm.OpcodeVecI8x16ShrS, wasm.OpcodeVecI8x16ShrU,
wasm.OpcodeVecI16x8Shl, wasm.OpcodeVecI16x8ShrS, wasm.OpcodeVecI16x8ShrU,
wasm.OpcodeVecI32x4Shl, wasm.OpcodeVecI32x4ShrS, wasm.OpcodeVecI32x4ShrU,
wasm.OpcodeVecI64x2Shl, wasm.OpcodeVecI64x2ShrS, wasm.OpcodeVecI64x2ShrU:
return signature_V128I32_V128, nil
case wasm.OpcodeVecI64x2ReplaceLane:
return signature_V128I64_V128, nil
case wasm.OpcodeVecF32x4ReplaceLane:
return signature_V128F32_V128, nil
case wasm.OpcodeVecF64x2ReplaceLane:
return signature_V128F64_V128, nil
case wasm.OpcodeVecI8x16Splat,
wasm.OpcodeVecI16x8Splat,
wasm.OpcodeVecI32x4Splat:
return signature_I32_V128, nil
case wasm.OpcodeVecI64x2Splat:
return signature_I64_V128, nil
case wasm.OpcodeVecF32x4Splat:
return signature_F32_V128, nil
case wasm.OpcodeVecF64x2Splat:
return signature_F64_V128, nil
case wasm.OpcodeVecV128i8x16Shuffle, wasm.OpcodeVecI8x16Swizzle, wasm.OpcodeVecV128And, wasm.OpcodeVecV128Or, wasm.OpcodeVecV128Xor, wasm.OpcodeVecV128AndNot:
return signature_V128V128_V128, nil
case wasm.OpcodeVecI8x16AllTrue, wasm.OpcodeVecI16x8AllTrue, wasm.OpcodeVecI32x4AllTrue, wasm.OpcodeVecI64x2AllTrue,
wasm.OpcodeVecV128AnyTrue,
wasm.OpcodeVecI8x16BitMask, wasm.OpcodeVecI16x8BitMask, wasm.OpcodeVecI32x4BitMask, wasm.OpcodeVecI64x2BitMask:
return signature_V128_I32, nil
case wasm.OpcodeVecV128Not, wasm.OpcodeVecI8x16Neg, wasm.OpcodeVecI16x8Neg, wasm.OpcodeVecI32x4Neg, wasm.OpcodeVecI64x2Neg,
wasm.OpcodeVecF32x4Neg, wasm.OpcodeVecF64x2Neg, wasm.OpcodeVecF32x4Sqrt, wasm.OpcodeVecF64x2Sqrt,
wasm.OpcodeVecI8x16Abs, wasm.OpcodeVecI8x16Popcnt, wasm.OpcodeVecI16x8Abs, wasm.OpcodeVecI32x4Abs, wasm.OpcodeVecI64x2Abs,
wasm.OpcodeVecF32x4Abs, wasm.OpcodeVecF64x2Abs,
wasm.OpcodeVecF32x4Ceil, wasm.OpcodeVecF32x4Floor, wasm.OpcodeVecF32x4Trunc, wasm.OpcodeVecF32x4Nearest,
wasm.OpcodeVecF64x2Ceil, wasm.OpcodeVecF64x2Floor, wasm.OpcodeVecF64x2Trunc, wasm.OpcodeVecF64x2Nearest,
wasm.OpcodeVecI16x8ExtendLowI8x16S, wasm.OpcodeVecI16x8ExtendHighI8x16S, wasm.OpcodeVecI16x8ExtendLowI8x16U, wasm.OpcodeVecI16x8ExtendHighI8x16U,
wasm.OpcodeVecI32x4ExtendLowI16x8S, wasm.OpcodeVecI32x4ExtendHighI16x8S, wasm.OpcodeVecI32x4ExtendLowI16x8U, wasm.OpcodeVecI32x4ExtendHighI16x8U,
wasm.OpcodeVecI64x2ExtendLowI32x4S, wasm.OpcodeVecI64x2ExtendHighI32x4S, wasm.OpcodeVecI64x2ExtendLowI32x4U, wasm.OpcodeVecI64x2ExtendHighI32x4U,
wasm.OpcodeVecI16x8ExtaddPairwiseI8x16S, wasm.OpcodeVecI16x8ExtaddPairwiseI8x16U, wasm.OpcodeVecI32x4ExtaddPairwiseI16x8S, wasm.OpcodeVecI32x4ExtaddPairwiseI16x8U,
wasm.OpcodeVecF64x2PromoteLowF32x4Zero, wasm.OpcodeVecF32x4DemoteF64x2Zero,
wasm.OpcodeVecF32x4ConvertI32x4S, wasm.OpcodeVecF32x4ConvertI32x4U,
wasm.OpcodeVecF64x2ConvertLowI32x4S, wasm.OpcodeVecF64x2ConvertLowI32x4U,
wasm.OpcodeVecI32x4TruncSatF32x4S, wasm.OpcodeVecI32x4TruncSatF32x4U,
wasm.OpcodeVecI32x4TruncSatF64x2SZero, wasm.OpcodeVecI32x4TruncSatF64x2UZero:
return signature_V128_V128, nil
case wasm.OpcodeVecV128Bitselect:
return signature_V128V128V128_V32, nil
case wasm.OpcodeVecI8x16Eq, wasm.OpcodeVecI8x16Ne, wasm.OpcodeVecI8x16LtS, wasm.OpcodeVecI8x16LtU, wasm.OpcodeVecI8x16GtS,
wasm.OpcodeVecI8x16GtU, wasm.OpcodeVecI8x16LeS, wasm.OpcodeVecI8x16LeU, wasm.OpcodeVecI8x16GeS, wasm.OpcodeVecI8x16GeU,
wasm.OpcodeVecI16x8Eq, wasm.OpcodeVecI16x8Ne, wasm.OpcodeVecI16x8LtS, wasm.OpcodeVecI16x8LtU, wasm.OpcodeVecI16x8GtS,
wasm.OpcodeVecI16x8GtU, wasm.OpcodeVecI16x8LeS, wasm.OpcodeVecI16x8LeU, wasm.OpcodeVecI16x8GeS, wasm.OpcodeVecI16x8GeU,
wasm.OpcodeVecI32x4Eq, wasm.OpcodeVecI32x4Ne, wasm.OpcodeVecI32x4LtS, wasm.OpcodeVecI32x4LtU, wasm.OpcodeVecI32x4GtS,
wasm.OpcodeVecI32x4GtU, wasm.OpcodeVecI32x4LeS, wasm.OpcodeVecI32x4LeU, wasm.OpcodeVecI32x4GeS, wasm.OpcodeVecI32x4GeU,
wasm.OpcodeVecI64x2Eq, wasm.OpcodeVecI64x2Ne, wasm.OpcodeVecI64x2LtS, wasm.OpcodeVecI64x2GtS, wasm.OpcodeVecI64x2LeS,
wasm.OpcodeVecI64x2GeS, wasm.OpcodeVecF32x4Eq, wasm.OpcodeVecF32x4Ne, wasm.OpcodeVecF32x4Lt, wasm.OpcodeVecF32x4Gt,
wasm.OpcodeVecF32x4Le, wasm.OpcodeVecF32x4Ge, wasm.OpcodeVecF64x2Eq, wasm.OpcodeVecF64x2Ne, wasm.OpcodeVecF64x2Lt,
wasm.OpcodeVecF64x2Gt, wasm.OpcodeVecF64x2Le, wasm.OpcodeVecF64x2Ge,
wasm.OpcodeVecI8x16Add, wasm.OpcodeVecI8x16AddSatS, wasm.OpcodeVecI8x16AddSatU, wasm.OpcodeVecI8x16Sub,
wasm.OpcodeVecI8x16SubSatS, wasm.OpcodeVecI8x16SubSatU,
wasm.OpcodeVecI16x8Add, wasm.OpcodeVecI16x8AddSatS, wasm.OpcodeVecI16x8AddSatU, wasm.OpcodeVecI16x8Sub,
wasm.OpcodeVecI16x8SubSatS, wasm.OpcodeVecI16x8SubSatU, wasm.OpcodeVecI16x8Mul,
wasm.OpcodeVecI32x4Add, wasm.OpcodeVecI32x4Sub, wasm.OpcodeVecI32x4Mul,
wasm.OpcodeVecI64x2Add, wasm.OpcodeVecI64x2Sub, wasm.OpcodeVecI64x2Mul,
wasm.OpcodeVecF32x4Add, wasm.OpcodeVecF32x4Sub, wasm.OpcodeVecF32x4Mul, wasm.OpcodeVecF32x4Div,
wasm.OpcodeVecF64x2Add, wasm.OpcodeVecF64x2Sub, wasm.OpcodeVecF64x2Mul, wasm.OpcodeVecF64x2Div,
wasm.OpcodeVecI8x16MinS, wasm.OpcodeVecI8x16MinU, wasm.OpcodeVecI8x16MaxS, wasm.OpcodeVecI8x16MaxU, wasm.OpcodeVecI8x16AvgrU,
wasm.OpcodeVecI16x8MinS, wasm.OpcodeVecI16x8MinU, wasm.OpcodeVecI16x8MaxS, wasm.OpcodeVecI16x8MaxU, wasm.OpcodeVecI16x8AvgrU,
wasm.OpcodeVecI32x4MinS, wasm.OpcodeVecI32x4MinU, wasm.OpcodeVecI32x4MaxS, wasm.OpcodeVecI32x4MaxU,
wasm.OpcodeVecF32x4Min, wasm.OpcodeVecF32x4Max, wasm.OpcodeVecF64x2Min, wasm.OpcodeVecF64x2Max,
wasm.OpcodeVecF32x4Pmin, wasm.OpcodeVecF32x4Pmax, wasm.OpcodeVecF64x2Pmin, wasm.OpcodeVecF64x2Pmax,
wasm.OpcodeVecI16x8Q15mulrSatS,
wasm.OpcodeVecI16x8ExtMulLowI8x16S, wasm.OpcodeVecI16x8ExtMulHighI8x16S, wasm.OpcodeVecI16x8ExtMulLowI8x16U, wasm.OpcodeVecI16x8ExtMulHighI8x16U,
wasm.OpcodeVecI32x4ExtMulLowI16x8S, wasm.OpcodeVecI32x4ExtMulHighI16x8S, wasm.OpcodeVecI32x4ExtMulLowI16x8U, wasm.OpcodeVecI32x4ExtMulHighI16x8U,
wasm.OpcodeVecI64x2ExtMulLowI32x4S, wasm.OpcodeVecI64x2ExtMulHighI32x4S, wasm.OpcodeVecI64x2ExtMulLowI32x4U, wasm.OpcodeVecI64x2ExtMulHighI32x4U,
wasm.OpcodeVecI32x4DotI16x8S,
wasm.OpcodeVecI8x16NarrowI16x8S, wasm.OpcodeVecI8x16NarrowI16x8U, wasm.OpcodeVecI16x8NarrowI32x4S, wasm.OpcodeVecI16x8NarrowI32x4U:
return signature_V128V128_V128, nil
default:
return nil, fmt.Errorf("unsupported vector instruction in interpreterir: %s", wasm.VectorInstructionName(vecOp))
}
case wasm.OpcodeAtomicPrefix:
switch atomicOp := c.body[c.pc+1]; atomicOp {
case wasm.OpcodeAtomicMemoryNotify:
return signature_I32I32_I32, nil
case wasm.OpcodeAtomicMemoryWait32:
return signature_I32I32I64_I32, nil
case wasm.OpcodeAtomicMemoryWait64:
return signature_I32I64I64_I32, nil
case wasm.OpcodeAtomicFence:
return signature_None_None, nil
case wasm.OpcodeAtomicI32Load, wasm.OpcodeAtomicI32Load8U, wasm.OpcodeAtomicI32Load16U:
return signature_I32_I32, nil
case wasm.OpcodeAtomicI64Load, wasm.OpcodeAtomicI64Load8U, wasm.OpcodeAtomicI64Load16U, wasm.OpcodeAtomicI64Load32U:
return signature_I32_I64, nil
case wasm.OpcodeAtomicI32Store, wasm.OpcodeAtomicI32Store8, wasm.OpcodeAtomicI32Store16:
return signature_I32I32_None, nil
case wasm.OpcodeAtomicI64Store, wasm.OpcodeAtomicI64Store8, wasm.OpcodeAtomicI64Store16, wasm.OpcodeAtomicI64Store32:
return signature_I32I64_None, nil
case wasm.OpcodeAtomicI32RmwAdd, wasm.OpcodeAtomicI32RmwSub, wasm.OpcodeAtomicI32RmwAnd, wasm.OpcodeAtomicI32RmwOr, wasm.OpcodeAtomicI32RmwXor, wasm.OpcodeAtomicI32RmwXchg,
wasm.OpcodeAtomicI32Rmw8AddU, wasm.OpcodeAtomicI32Rmw8SubU, wasm.OpcodeAtomicI32Rmw8AndU, wasm.OpcodeAtomicI32Rmw8OrU, wasm.OpcodeAtomicI32Rmw8XorU, wasm.OpcodeAtomicI32Rmw8XchgU,
wasm.OpcodeAtomicI32Rmw16AddU, wasm.OpcodeAtomicI32Rmw16SubU, wasm.OpcodeAtomicI32Rmw16AndU, wasm.OpcodeAtomicI32Rmw16OrU, wasm.OpcodeAtomicI32Rmw16XorU, wasm.OpcodeAtomicI32Rmw16XchgU:
return signature_I32I32_I32, nil
case wasm.OpcodeAtomicI64RmwAdd, wasm.OpcodeAtomicI64RmwSub, wasm.OpcodeAtomicI64RmwAnd, wasm.OpcodeAtomicI64RmwOr, wasm.OpcodeAtomicI64RmwXor, wasm.OpcodeAtomicI64RmwXchg,
wasm.OpcodeAtomicI64Rmw8AddU, wasm.OpcodeAtomicI64Rmw8SubU, wasm.OpcodeAtomicI64Rmw8AndU, wasm.OpcodeAtomicI64Rmw8OrU, wasm.OpcodeAtomicI64Rmw8XorU, wasm.OpcodeAtomicI64Rmw8XchgU,
wasm.OpcodeAtomicI64Rmw16AddU, wasm.OpcodeAtomicI64Rmw16SubU, wasm.OpcodeAtomicI64Rmw16AndU, wasm.OpcodeAtomicI64Rmw16OrU, wasm.OpcodeAtomicI64Rmw16XorU, wasm.OpcodeAtomicI64Rmw16XchgU,
wasm.OpcodeAtomicI64Rmw32AddU, wasm.OpcodeAtomicI64Rmw32SubU, wasm.OpcodeAtomicI64Rmw32AndU, wasm.OpcodeAtomicI64Rmw32OrU, wasm.OpcodeAtomicI64Rmw32XorU, wasm.OpcodeAtomicI64Rmw32XchgU:
return signature_I32I64_I64, nil
case wasm.OpcodeAtomicI32RmwCmpxchg, wasm.OpcodeAtomicI32Rmw8CmpxchgU, wasm.OpcodeAtomicI32Rmw16CmpxchgU:
return signature_I32I32I32_I32, nil
case wasm.OpcodeAtomicI64RmwCmpxchg, wasm.OpcodeAtomicI64Rmw8CmpxchgU, wasm.OpcodeAtomicI64Rmw16CmpxchgU, wasm.OpcodeAtomicI64Rmw32CmpxchgU:
return signature_I32I64I64_I64, nil
default:
return nil, fmt.Errorf("unsupported atomic instruction in interpreterir: %s", wasm.AtomicInstructionName(atomicOp))
}
default:
return nil, fmt.Errorf("unsupported instruction in interpreterir: 0x%x", op)
}
}
// funcTypeToIRSignatures is the central cache for a module to get the *signature
// for function calls.
type funcTypeToIRSignatures struct {
directCalls []*signature
indirectCalls []*signature
wasmTypes []wasm.FunctionType
}
// get returns the *signature for the direct or indirect function call against functions whose type is at `typeIndex`.
func (f *funcTypeToIRSignatures) get(typeIndex wasm.Index, indirect bool) *signature {
var sig *signature
if indirect {
sig = f.indirectCalls[typeIndex]
} else {
sig = f.directCalls[typeIndex]
}
if sig != nil {
return sig
}
tp := &f.wasmTypes[typeIndex]
if indirect {
sig = &signature{
in: make([]unsignedType, 0, len(tp.Params)+1), // +1 to reserve space for call indirect index.
out: make([]unsignedType, 0, len(tp.Results)),
}
} else {
sig = &signature{
in: make([]unsignedType, 0, len(tp.Params)),
out: make([]unsignedType, 0, len(tp.Results)),
}
}
for _, vt := range tp.Params {
sig.in = append(sig.in, wasmValueTypeTounsignedType(vt))
}
for _, vt := range tp.Results {
sig.out = append(sig.out, wasmValueTypeTounsignedType(vt))
}
if indirect {
sig.in = append(sig.in, unsignedTypeI32)
f.indirectCalls[typeIndex] = sig
} else {
f.directCalls[typeIndex] = sig
}
return sig
}
func wasmValueTypeTounsignedType(vt wasm.ValueType) unsignedType {
switch vt {
case wasm.ValueTypeI32:
return unsignedTypeI32
case wasm.ValueTypeI64,
// From interpreterir layer, ref type values are opaque 64-bit pointers.
wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
return unsignedTypeI64
case wasm.ValueTypeF32:
return unsignedTypeF32
case wasm.ValueTypeF64:
return unsignedTypeF64
case wasm.ValueTypeV128:
return unsignedTypeV128
}
panic("unreachable")
}
func wasmValueTypeToUnsignedOutSignature(vt wasm.ValueType) *signature {
switch vt {
case wasm.ValueTypeI32:
return signature_None_I32
case wasm.ValueTypeI64,
// From interpreterir layer, ref type values are opaque 64-bit pointers.
wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
return signature_None_I64
case wasm.ValueTypeF32:
return signature_None_F32
case wasm.ValueTypeF64:
return signature_None_F64
case wasm.ValueTypeV128:
return signature_None_V128
}
panic("unreachable")
}
func wasmValueTypeToUnsignedInSignature(vt wasm.ValueType) *signature {
switch vt {
case wasm.ValueTypeI32:
return signature_I32_None
case wasm.ValueTypeI64,
// From interpreterir layer, ref type values are opaque 64-bit pointers.
wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
return signature_I64_None
case wasm.ValueTypeF32:
return signature_F32_None
case wasm.ValueTypeF64:
return signature_F64_None
case wasm.ValueTypeV128:
return signature_V128_None
}
panic("unreachable")
}
func wasmValueTypeToUnsignedInOutSignature(vt wasm.ValueType) *signature {
switch vt {
case wasm.ValueTypeI32:
return signature_I32_I32
case wasm.ValueTypeI64,
// At interpreterir layer, ref type values are opaque 64-bit pointers.
wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
return signature_I64_I64
case wasm.ValueTypeF32:
return signature_F32_F32
case wasm.ValueTypeF64:
return signature_F64_F64
case wasm.ValueTypeV128:
return signature_V128_V128
}
panic("unreachable")
}

View File

@ -0,0 +1,170 @@
package backend
import (
"fmt"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
type (
// FunctionABI represents the ABI information for a function which corresponds to a ssa.Signature.
FunctionABI struct {
Initialized bool
Args, Rets []ABIArg
ArgStackSize, RetStackSize int64
ArgIntRealRegs byte
ArgFloatRealRegs byte
RetIntRealRegs byte
RetFloatRealRegs byte
}
// ABIArg represents either argument or return value's location.
ABIArg struct {
// Index is the index of the argument.
Index int
// Kind is the kind of the argument.
Kind ABIArgKind
// Reg is valid if Kind == ABIArgKindReg.
// This VReg must be based on RealReg.
Reg regalloc.VReg
// Offset is valid if Kind == ABIArgKindStack.
// This is the offset from the beginning of either arg or ret stack slot.
Offset int64
// Type is the type of the argument.
Type ssa.Type
}
// ABIArgKind is the kind of ABI argument.
ABIArgKind byte
)
const (
// ABIArgKindReg represents an argument passed in a register.
ABIArgKindReg = iota
// ABIArgKindStack represents an argument passed in the stack.
ABIArgKindStack
)
// String implements fmt.Stringer.
func (a *ABIArg) String() string {
return fmt.Sprintf("args[%d]: %s", a.Index, a.Kind)
}
// String implements fmt.Stringer.
func (a ABIArgKind) String() string {
switch a {
case ABIArgKindReg:
return "reg"
case ABIArgKindStack:
return "stack"
default:
panic("BUG")
}
}
// Init initializes the abiImpl for the given signature.
func (a *FunctionABI) Init(sig *ssa.Signature, argResultInts, argResultFloats []regalloc.RealReg) {
if len(a.Rets) < len(sig.Results) {
a.Rets = make([]ABIArg, len(sig.Results))
}
a.Rets = a.Rets[:len(sig.Results)]
a.RetStackSize = a.setABIArgs(a.Rets, sig.Results, argResultInts, argResultFloats)
if argsNum := len(sig.Params); len(a.Args) < argsNum {
a.Args = make([]ABIArg, argsNum)
}
a.Args = a.Args[:len(sig.Params)]
a.ArgStackSize = a.setABIArgs(a.Args, sig.Params, argResultInts, argResultFloats)
// Gather the real registers usages in arg/return.
a.ArgIntRealRegs, a.ArgFloatRealRegs = 0, 0
a.RetIntRealRegs, a.RetFloatRealRegs = 0, 0
for i := range a.Rets {
r := &a.Rets[i]
if r.Kind == ABIArgKindReg {
if r.Type.IsInt() {
a.RetIntRealRegs++
} else {
a.RetFloatRealRegs++
}
}
}
for i := range a.Args {
arg := &a.Args[i]
if arg.Kind == ABIArgKindReg {
if arg.Type.IsInt() {
a.ArgIntRealRegs++
} else {
a.ArgFloatRealRegs++
}
}
}
a.Initialized = true
}
// setABIArgs sets the ABI arguments in the given slice. This assumes that len(s) >= len(types)
// where if len(s) > len(types), the last elements of s is for the multi-return slot.
func (a *FunctionABI) setABIArgs(s []ABIArg, types []ssa.Type, ints, floats []regalloc.RealReg) (stackSize int64) {
il, fl := len(ints), len(floats)
var stackOffset int64
intParamIndex, floatParamIndex := 0, 0
for i, typ := range types {
arg := &s[i]
arg.Index = i
arg.Type = typ
if typ.IsInt() {
if intParamIndex >= il {
arg.Kind = ABIArgKindStack
const slotSize = 8 // Align 8 bytes.
arg.Offset = stackOffset
stackOffset += slotSize
} else {
arg.Kind = ABIArgKindReg
arg.Reg = regalloc.FromRealReg(ints[intParamIndex], regalloc.RegTypeInt)
intParamIndex++
}
} else {
if floatParamIndex >= fl {
arg.Kind = ABIArgKindStack
slotSize := int64(8) // Align at least 8 bytes.
if typ.Bits() == 128 { // Vector.
slotSize = 16
}
arg.Offset = stackOffset
stackOffset += slotSize
} else {
arg.Kind = ABIArgKindReg
arg.Reg = regalloc.FromRealReg(floats[floatParamIndex], regalloc.RegTypeFloat)
floatParamIndex++
}
}
}
return stackOffset
}
func (a *FunctionABI) AlignedArgResultStackSlotSize() uint32 {
stackSlotSize := a.RetStackSize + a.ArgStackSize
// Align stackSlotSize to 16 bytes.
stackSlotSize = (stackSlotSize + 15) &^ 15
// Check overflow 32-bit.
if stackSlotSize > 0xFFFFFFFF {
panic("ABI stack slot size overflow")
}
return uint32(stackSlotSize)
}
func (a *FunctionABI) ABIInfoAsUint64() uint64 {
return uint64(a.ArgIntRealRegs)<<56 |
uint64(a.ArgFloatRealRegs)<<48 |
uint64(a.RetIntRealRegs)<<40 |
uint64(a.RetFloatRealRegs)<<32 |
uint64(a.AlignedArgResultStackSlotSize())
}
func ABIInfoFromUint64(info uint64) (argIntRealRegs, argFloatRealRegs, retIntRealRegs, retFloatRealRegs byte, stackSlotSize uint32) {
return byte(info >> 56), byte(info >> 48), byte(info >> 40), byte(info >> 32), uint32(info)
}

View File

@ -0,0 +1,3 @@
// Package backend must be free of Wasm-specific concept. In other words,
// this package must not import internal/wasm package.
package backend

View File

@ -0,0 +1,417 @@
package backend
import (
"context"
"fmt"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
// NewCompiler returns a new Compiler that can generate a machine code.
func NewCompiler(ctx context.Context, mach Machine, builder ssa.Builder) Compiler {
return newCompiler(ctx, mach, builder)
}
func newCompiler(_ context.Context, mach Machine, builder ssa.Builder) *compiler {
argResultInts, argResultFloats := mach.ArgsResultsRegs()
c := &compiler{
mach: mach, ssaBuilder: builder,
nextVRegID: regalloc.VRegIDNonReservedBegin,
argResultInts: argResultInts,
argResultFloats: argResultFloats,
}
mach.SetCompiler(c)
return c
}
// Compiler is the backend of wazevo which takes ssa.Builder and Machine,
// use the information there to emit the final machine code.
type Compiler interface {
// SSABuilder returns the ssa.Builder used by this compiler.
SSABuilder() ssa.Builder
// Compile executes the following steps:
// 1. Lower()
// 2. RegAlloc()
// 3. Finalize()
// 4. Encode()
//
// Each step can be called individually for testing purpose, therefore they are exposed in this interface too.
//
// The returned byte slices are the machine code and the relocation information for the machine code.
// The caller is responsible for copying them immediately since the compiler may reuse the buffer.
Compile(ctx context.Context) (_ []byte, _ []RelocationInfo, _ error)
// Lower lowers the given ssa.Instruction to the machine-specific instructions.
Lower()
// RegAlloc performs the register allocation after Lower is called.
RegAlloc()
// Finalize performs the finalization of the compilation, including machine code emission.
// This must be called after RegAlloc.
Finalize(ctx context.Context) error
// Buf returns the buffer of the encoded machine code. This is only used for testing purpose.
Buf() []byte
BufPtr() *[]byte
// Format returns the debug string of the current state of the compiler.
Format() string
// Init initializes the internal state of the compiler for the next compilation.
Init()
// AllocateVReg allocates a new virtual register of the given type.
AllocateVReg(typ ssa.Type) regalloc.VReg
// ValueDefinition returns the definition of the given value.
ValueDefinition(ssa.Value) *SSAValueDefinition
// VRegOf returns the virtual register of the given ssa.Value.
VRegOf(value ssa.Value) regalloc.VReg
// TypeOf returns the ssa.Type of the given virtual register.
TypeOf(regalloc.VReg) ssa.Type
// MatchInstr returns true if the given definition is from an instruction with the given opcode, the current group ID,
// and a refcount of 1. That means, the instruction can be merged/swapped within the current instruction group.
MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool
// MatchInstrOneOf is the same as MatchInstr but for multiple opcodes. If it matches one of ssa.Opcode,
// this returns the opcode. Otherwise, this returns ssa.OpcodeInvalid.
//
// Note: caller should be careful to avoid excessive allocation on opcodes slice.
MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode
// AddRelocationInfo appends the relocation information for the function reference at the current buffer offset.
AddRelocationInfo(funcRef ssa.FuncRef)
// AddSourceOffsetInfo appends the source offset information for the given offset.
AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset)
// SourceOffsetInfo returns the source offset information for the current buffer offset.
SourceOffsetInfo() []SourceOffsetInfo
// EmitByte appends a byte to the buffer. Used during the code emission.
EmitByte(b byte)
// Emit4Bytes appends 4 bytes to the buffer. Used during the code emission.
Emit4Bytes(b uint32)
// Emit8Bytes appends 8 bytes to the buffer. Used during the code emission.
Emit8Bytes(b uint64)
// GetFunctionABI returns the ABI information for the given signature.
GetFunctionABI(sig *ssa.Signature) *FunctionABI
}
// RelocationInfo represents the relocation information for a call instruction.
type RelocationInfo struct {
// Offset represents the offset from the beginning of the machine code of either a function or the entire module.
Offset int64
// Target is the target function of the call instruction.
FuncRef ssa.FuncRef
}
// compiler implements Compiler.
type compiler struct {
mach Machine
currentGID ssa.InstructionGroupID
ssaBuilder ssa.Builder
// nextVRegID is the next virtual register ID to be allocated.
nextVRegID regalloc.VRegID
// ssaValueToVRegs maps ssa.ValueID to regalloc.VReg.
ssaValueToVRegs [] /* VRegID to */ regalloc.VReg
// ssaValueDefinitions maps ssa.ValueID to its definition.
ssaValueDefinitions []SSAValueDefinition
// ssaValueRefCounts is a cached list obtained by ssa.Builder.ValueRefCounts().
ssaValueRefCounts []int
// returnVRegs is the list of virtual registers that store the return values.
returnVRegs []regalloc.VReg
varEdges [][2]regalloc.VReg
varEdgeTypes []ssa.Type
constEdges []struct {
cInst *ssa.Instruction
dst regalloc.VReg
}
vRegSet []bool
vRegIDs []regalloc.VRegID
tempRegs []regalloc.VReg
tmpVals []ssa.Value
ssaTypeOfVRegID [] /* VRegID to */ ssa.Type
buf []byte
relocations []RelocationInfo
sourceOffsets []SourceOffsetInfo
// abis maps ssa.SignatureID to the ABI implementation.
abis []FunctionABI
argResultInts, argResultFloats []regalloc.RealReg
}
// SourceOffsetInfo is a data to associate the source offset with the executable offset.
type SourceOffsetInfo struct {
// SourceOffset is the source offset in the original source code.
SourceOffset ssa.SourceOffset
// ExecutableOffset is the offset in the compiled executable.
ExecutableOffset int64
}
// Compile implements Compiler.Compile.
func (c *compiler) Compile(ctx context.Context) ([]byte, []RelocationInfo, error) {
c.Lower()
if wazevoapi.PrintSSAToBackendIRLowering && wazevoapi.PrintEnabledIndex(ctx) {
fmt.Printf("[[[after lowering for %s ]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
}
if wazevoapi.DeterministicCompilationVerifierEnabled {
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After lowering to ISA specific IR", c.Format())
}
c.RegAlloc()
if wazevoapi.PrintRegisterAllocated && wazevoapi.PrintEnabledIndex(ctx) {
fmt.Printf("[[[after regalloc for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
}
if wazevoapi.DeterministicCompilationVerifierEnabled {
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Register Allocation", c.Format())
}
if err := c.Finalize(ctx); err != nil {
return nil, nil, err
}
if wazevoapi.PrintFinalizedMachineCode && wazevoapi.PrintEnabledIndex(ctx) {
fmt.Printf("[[[after finalize for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
}
if wazevoapi.DeterministicCompilationVerifierEnabled {
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Finalization", c.Format())
}
return c.buf, c.relocations, nil
}
// RegAlloc implements Compiler.RegAlloc.
func (c *compiler) RegAlloc() {
c.mach.RegAlloc()
}
// Finalize implements Compiler.Finalize.
func (c *compiler) Finalize(ctx context.Context) error {
c.mach.PostRegAlloc()
return c.mach.Encode(ctx)
}
// setCurrentGroupID sets the current instruction group ID.
func (c *compiler) setCurrentGroupID(gid ssa.InstructionGroupID) {
c.currentGID = gid
}
// assignVirtualRegisters assigns a virtual register to each ssa.ValueID Valid in the ssa.Builder.
func (c *compiler) assignVirtualRegisters() {
builder := c.ssaBuilder
refCounts := builder.ValueRefCounts()
c.ssaValueRefCounts = refCounts
need := len(refCounts)
if need >= len(c.ssaValueToVRegs) {
c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, need+1)...)
}
if need >= len(c.ssaValueDefinitions) {
c.ssaValueDefinitions = append(c.ssaValueDefinitions, make([]SSAValueDefinition, need+1)...)
}
for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
// First we assign a virtual register to each parameter.
for i := 0; i < blk.Params(); i++ {
p := blk.Param(i)
pid := p.ID()
typ := p.Type()
vreg := c.AllocateVReg(typ)
c.ssaValueToVRegs[pid] = vreg
c.ssaValueDefinitions[pid] = SSAValueDefinition{BlockParamValue: p, BlkParamVReg: vreg}
c.ssaTypeOfVRegID[vreg.ID()] = p.Type()
}
// Assigns each value to a virtual register produced by instructions.
for cur := blk.Root(); cur != nil; cur = cur.Next() {
r, rs := cur.Returns()
var N int
if r.Valid() {
id := r.ID()
ssaTyp := r.Type()
typ := r.Type()
vReg := c.AllocateVReg(typ)
c.ssaValueToVRegs[id] = vReg
c.ssaValueDefinitions[id] = SSAValueDefinition{
Instr: cur,
N: 0,
RefCount: refCounts[id],
}
c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
N++
}
for _, r := range rs {
id := r.ID()
ssaTyp := r.Type()
vReg := c.AllocateVReg(ssaTyp)
c.ssaValueToVRegs[id] = vReg
c.ssaValueDefinitions[id] = SSAValueDefinition{
Instr: cur,
N: N,
RefCount: refCounts[id],
}
c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
N++
}
}
}
for i, retBlk := 0, builder.ReturnBlock(); i < retBlk.Params(); i++ {
typ := retBlk.Param(i).Type()
vReg := c.AllocateVReg(typ)
c.returnVRegs = append(c.returnVRegs, vReg)
c.ssaTypeOfVRegID[vReg.ID()] = typ
}
}
// AllocateVReg implements Compiler.AllocateVReg.
func (c *compiler) AllocateVReg(typ ssa.Type) regalloc.VReg {
regType := regalloc.RegTypeOf(typ)
r := regalloc.VReg(c.nextVRegID).SetRegType(regType)
id := r.ID()
if int(id) >= len(c.ssaTypeOfVRegID) {
c.ssaTypeOfVRegID = append(c.ssaTypeOfVRegID, make([]ssa.Type, id+1)...)
}
c.ssaTypeOfVRegID[id] = typ
c.nextVRegID++
return r
}
// Init implements Compiler.Init.
func (c *compiler) Init() {
c.currentGID = 0
c.nextVRegID = regalloc.VRegIDNonReservedBegin
c.returnVRegs = c.returnVRegs[:0]
c.mach.Reset()
c.varEdges = c.varEdges[:0]
c.constEdges = c.constEdges[:0]
c.buf = c.buf[:0]
c.sourceOffsets = c.sourceOffsets[:0]
c.relocations = c.relocations[:0]
}
// ValueDefinition implements Compiler.ValueDefinition.
func (c *compiler) ValueDefinition(value ssa.Value) *SSAValueDefinition {
return &c.ssaValueDefinitions[value.ID()]
}
// VRegOf implements Compiler.VRegOf.
func (c *compiler) VRegOf(value ssa.Value) regalloc.VReg {
return c.ssaValueToVRegs[value.ID()]
}
// Format implements Compiler.Format.
func (c *compiler) Format() string {
return c.mach.Format()
}
// TypeOf implements Compiler.Format.
func (c *compiler) TypeOf(v regalloc.VReg) ssa.Type {
return c.ssaTypeOfVRegID[v.ID()]
}
// MatchInstr implements Compiler.MatchInstr.
func (c *compiler) MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool {
instr := def.Instr
return def.IsFromInstr() &&
instr.Opcode() == opcode &&
instr.GroupID() == c.currentGID &&
def.RefCount < 2
}
// MatchInstrOneOf implements Compiler.MatchInstrOneOf.
func (c *compiler) MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode {
instr := def.Instr
if !def.IsFromInstr() {
return ssa.OpcodeInvalid
}
if instr.GroupID() != c.currentGID {
return ssa.OpcodeInvalid
}
if def.RefCount >= 2 {
return ssa.OpcodeInvalid
}
opcode := instr.Opcode()
for _, op := range opcodes {
if opcode == op {
return opcode
}
}
return ssa.OpcodeInvalid
}
// SSABuilder implements Compiler .SSABuilder.
func (c *compiler) SSABuilder() ssa.Builder {
return c.ssaBuilder
}
// AddSourceOffsetInfo implements Compiler.AddSourceOffsetInfo.
func (c *compiler) AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) {
c.sourceOffsets = append(c.sourceOffsets, SourceOffsetInfo{
SourceOffset: sourceOffset,
ExecutableOffset: executableOffset,
})
}
// SourceOffsetInfo implements Compiler.SourceOffsetInfo.
func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo {
return c.sourceOffsets
}
// AddRelocationInfo implements Compiler.AddRelocationInfo.
func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) {
c.relocations = append(c.relocations, RelocationInfo{
Offset: int64(len(c.buf)),
FuncRef: funcRef,
})
}
// Emit8Bytes implements Compiler.Emit8Bytes.
func (c *compiler) Emit8Bytes(b uint64) {
c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24), byte(b>>32), byte(b>>40), byte(b>>48), byte(b>>56))
}
// Emit4Bytes implements Compiler.Emit4Bytes.
func (c *compiler) Emit4Bytes(b uint32) {
c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24))
}
// EmitByte implements Compiler.EmitByte.
func (c *compiler) EmitByte(b byte) {
c.buf = append(c.buf, b)
}
// Buf implements Compiler.Buf.
func (c *compiler) Buf() []byte {
return c.buf
}
// BufPtr implements Compiler.BufPtr.
func (c *compiler) BufPtr() *[]byte {
return &c.buf
}
func (c *compiler) GetFunctionABI(sig *ssa.Signature) *FunctionABI {
if int(sig.ID) >= len(c.abis) {
c.abis = append(c.abis, make([]FunctionABI, int(sig.ID)+1)...)
}
abi := &c.abis[sig.ID]
if abi.Initialized {
return abi
}
abi.Init(sig, c.argResultInts, c.argResultFloats)
return abi
}

View File

@ -0,0 +1,226 @@
package backend
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
// Lower implements Compiler.Lower.
func (c *compiler) Lower() {
c.assignVirtualRegisters()
c.mach.SetCurrentABI(c.GetFunctionABI(c.ssaBuilder.Signature()))
c.mach.ExecutableContext().StartLoweringFunction(c.ssaBuilder.BlockIDMax())
c.lowerBlocks()
}
// lowerBlocks lowers each block in the ssa.Builder.
func (c *compiler) lowerBlocks() {
builder := c.ssaBuilder
for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
c.lowerBlock(blk)
}
ectx := c.mach.ExecutableContext()
// After lowering all blocks, we need to link adjacent blocks to layout one single instruction list.
var prev ssa.BasicBlock
for next := builder.BlockIteratorReversePostOrderBegin(); next != nil; next = builder.BlockIteratorReversePostOrderNext() {
if prev != nil {
ectx.LinkAdjacentBlocks(prev, next)
}
prev = next
}
}
func (c *compiler) lowerBlock(blk ssa.BasicBlock) {
mach := c.mach
ectx := mach.ExecutableContext()
ectx.StartBlock(blk)
// We traverse the instructions in reverse order because we might want to lower multiple
// instructions together.
cur := blk.Tail()
// First gather the branching instructions at the end of the blocks.
var br0, br1 *ssa.Instruction
if cur.IsBranching() {
br0 = cur
cur = cur.Prev()
if cur != nil && cur.IsBranching() {
br1 = cur
cur = cur.Prev()
}
}
if br0 != nil {
c.lowerBranches(br0, br1)
}
if br1 != nil && br0 == nil {
panic("BUG? when a block has conditional branch but doesn't end with an unconditional branch?")
}
// Now start lowering the non-branching instructions.
for ; cur != nil; cur = cur.Prev() {
c.setCurrentGroupID(cur.GroupID())
if cur.Lowered() {
continue
}
switch cur.Opcode() {
case ssa.OpcodeReturn:
rets := cur.ReturnVals()
if len(rets) > 0 {
c.mach.LowerReturns(rets)
}
c.mach.InsertReturn()
default:
mach.LowerInstr(cur)
}
ectx.FlushPendingInstructions()
}
// Finally, if this is the entry block, we have to insert copies of arguments from the real location to the VReg.
if blk.EntryBlock() {
c.lowerFunctionArguments(blk)
}
ectx.EndBlock()
}
// lowerBranches is called right after StartBlock and before any LowerInstr call if
// there are branches to the given block. br0 is the very end of the block and b1 is the before the br0 if it exists.
// At least br0 is not nil, but br1 can be nil if there's no branching before br0.
//
// See ssa.Instruction IsBranching, and the comment on ssa.BasicBlock.
func (c *compiler) lowerBranches(br0, br1 *ssa.Instruction) {
ectx := c.mach.ExecutableContext()
c.setCurrentGroupID(br0.GroupID())
c.mach.LowerSingleBranch(br0)
ectx.FlushPendingInstructions()
if br1 != nil {
c.setCurrentGroupID(br1.GroupID())
c.mach.LowerConditionalBranch(br1)
ectx.FlushPendingInstructions()
}
if br0.Opcode() == ssa.OpcodeJump {
_, args, target := br0.BranchData()
argExists := len(args) != 0
if argExists && br1 != nil {
panic("BUG: critical edge split failed")
}
if argExists && target.ReturnBlock() {
if len(args) > 0 {
c.mach.LowerReturns(args)
}
} else if argExists {
c.lowerBlockArguments(args, target)
}
}
ectx.FlushPendingInstructions()
}
func (c *compiler) lowerFunctionArguments(entry ssa.BasicBlock) {
ectx := c.mach.ExecutableContext()
c.tmpVals = c.tmpVals[:0]
for i := 0; i < entry.Params(); i++ {
p := entry.Param(i)
if c.ssaValueRefCounts[p.ID()] > 0 {
c.tmpVals = append(c.tmpVals, p)
} else {
// If the argument is not used, we can just pass an invalid value.
c.tmpVals = append(c.tmpVals, ssa.ValueInvalid)
}
}
c.mach.LowerParams(c.tmpVals)
ectx.FlushPendingInstructions()
}
// lowerBlockArguments lowers how to pass arguments to the given successor block.
func (c *compiler) lowerBlockArguments(args []ssa.Value, succ ssa.BasicBlock) {
if len(args) != succ.Params() {
panic("BUG: mismatched number of arguments")
}
c.varEdges = c.varEdges[:0]
c.varEdgeTypes = c.varEdgeTypes[:0]
c.constEdges = c.constEdges[:0]
for i := 0; i < len(args); i++ {
dst := succ.Param(i)
src := args[i]
dstReg := c.VRegOf(dst)
srcDef := c.ssaValueDefinitions[src.ID()]
if srcDef.IsFromInstr() && srcDef.Instr.Constant() {
c.constEdges = append(c.constEdges, struct {
cInst *ssa.Instruction
dst regalloc.VReg
}{cInst: srcDef.Instr, dst: dstReg})
} else {
srcReg := c.VRegOf(src)
// Even when the src=dst, insert the move so that we can keep such registers keep-alive.
c.varEdges = append(c.varEdges, [2]regalloc.VReg{srcReg, dstReg})
c.varEdgeTypes = append(c.varEdgeTypes, src.Type())
}
}
// Check if there's an overlap among the dsts and srcs in varEdges.
c.vRegIDs = c.vRegIDs[:0]
for _, edge := range c.varEdges {
src := edge[0].ID()
if int(src) >= len(c.vRegSet) {
c.vRegSet = append(c.vRegSet, make([]bool, src+1)...)
}
c.vRegSet[src] = true
c.vRegIDs = append(c.vRegIDs, src)
}
separated := true
for _, edge := range c.varEdges {
dst := edge[1].ID()
if int(dst) >= len(c.vRegSet) {
c.vRegSet = append(c.vRegSet, make([]bool, dst+1)...)
} else {
if c.vRegSet[dst] {
separated = false
break
}
}
}
for _, id := range c.vRegIDs {
c.vRegSet[id] = false // reset for the next use.
}
if separated {
// If there's no overlap, we can simply move the source to destination.
for i, edge := range c.varEdges {
src, dst := edge[0], edge[1]
c.mach.InsertMove(dst, src, c.varEdgeTypes[i])
}
} else {
// Otherwise, we allocate a temporary registers and move the source to the temporary register,
//
// First move all of them to temporary registers.
c.tempRegs = c.tempRegs[:0]
for i, edge := range c.varEdges {
src := edge[0]
typ := c.varEdgeTypes[i]
temp := c.AllocateVReg(typ)
c.tempRegs = append(c.tempRegs, temp)
c.mach.InsertMove(temp, src, typ)
}
// Then move the temporary registers to the destination.
for i, edge := range c.varEdges {
temp := c.tempRegs[i]
dst := edge[1]
c.mach.InsertMove(dst, temp, c.varEdgeTypes[i])
}
}
// Finally, move the constants.
for _, edge := range c.constEdges {
cInst, dst := edge.cInst, edge.dst
c.mach.InsertLoadConstantBlockArg(cInst, dst)
}
}

View File

@ -0,0 +1,219 @@
package backend
import (
"fmt"
"math"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
type ExecutableContext interface {
// StartLoweringFunction is called when the lowering of the given function is started.
// maximumBlockID is the maximum value of ssa.BasicBlockID existing in the function.
StartLoweringFunction(maximumBlockID ssa.BasicBlockID)
// LinkAdjacentBlocks is called after finished lowering all blocks in order to create one single instruction list.
LinkAdjacentBlocks(prev, next ssa.BasicBlock)
// StartBlock is called when the compilation of the given block is started.
// The order of this being called is the reverse post order of the ssa.BasicBlock(s) as we iterate with
// ssa.Builder BlockIteratorReversePostOrderBegin and BlockIteratorReversePostOrderEnd.
StartBlock(ssa.BasicBlock)
// EndBlock is called when the compilation of the current block is finished.
EndBlock()
// FlushPendingInstructions flushes the pending instructions to the buffer.
// This will be called after the lowering of each SSA Instruction.
FlushPendingInstructions()
}
type ExecutableContextT[Instr any] struct {
CurrentSSABlk ssa.BasicBlock
// InstrPool is the InstructionPool of instructions.
InstructionPool wazevoapi.Pool[Instr]
asNop func(*Instr)
setNext func(*Instr, *Instr)
setPrev func(*Instr, *Instr)
// RootInstr is the root instruction of the executable.
RootInstr *Instr
labelPositionPool wazevoapi.Pool[LabelPosition[Instr]]
NextLabel Label
// LabelPositions maps a label to the instructions of the region which the label represents.
LabelPositions map[Label]*LabelPosition[Instr]
OrderedBlockLabels []*LabelPosition[Instr]
// PerBlockHead and PerBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock.
PerBlockHead, PerBlockEnd *Instr
// PendingInstructions are the instructions which are not yet emitted into the instruction list.
PendingInstructions []*Instr
// SsaBlockIDToLabels maps an SSA block ID to the label.
SsaBlockIDToLabels []Label
}
func NewExecutableContextT[Instr any](
resetInstruction func(*Instr),
setNext func(*Instr, *Instr),
setPrev func(*Instr, *Instr),
asNop func(*Instr),
) *ExecutableContextT[Instr] {
return &ExecutableContextT[Instr]{
InstructionPool: wazevoapi.NewPool[Instr](resetInstruction),
asNop: asNop,
setNext: setNext,
setPrev: setPrev,
labelPositionPool: wazevoapi.NewPool[LabelPosition[Instr]](resetLabelPosition[Instr]),
LabelPositions: make(map[Label]*LabelPosition[Instr]),
NextLabel: LabelInvalid,
}
}
func resetLabelPosition[T any](l *LabelPosition[T]) {
*l = LabelPosition[T]{}
}
// StartLoweringFunction implements ExecutableContext.
func (e *ExecutableContextT[Instr]) StartLoweringFunction(max ssa.BasicBlockID) {
imax := int(max)
if len(e.SsaBlockIDToLabels) <= imax {
// Eagerly allocate labels for the blocks since the underlying slice will be used for the next iteration.
e.SsaBlockIDToLabels = append(e.SsaBlockIDToLabels, make([]Label, imax+1)...)
}
}
func (e *ExecutableContextT[Instr]) StartBlock(blk ssa.BasicBlock) {
e.CurrentSSABlk = blk
l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()]
if l == LabelInvalid {
l = e.AllocateLabel()
e.SsaBlockIDToLabels[blk.ID()] = l
}
end := e.allocateNop0()
e.PerBlockHead, e.PerBlockEnd = end, end
labelPos, ok := e.LabelPositions[l]
if !ok {
labelPos = e.AllocateLabelPosition(l)
e.LabelPositions[l] = labelPos
}
e.OrderedBlockLabels = append(e.OrderedBlockLabels, labelPos)
labelPos.Begin, labelPos.End = end, end
labelPos.SB = blk
}
// EndBlock implements ExecutableContext.
func (e *ExecutableContextT[T]) EndBlock() {
// Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions.
e.insertAtPerBlockHead(e.allocateNop0())
l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()]
e.LabelPositions[l].Begin = e.PerBlockHead
if e.CurrentSSABlk.EntryBlock() {
e.RootInstr = e.PerBlockHead
}
}
func (e *ExecutableContextT[T]) insertAtPerBlockHead(i *T) {
if e.PerBlockHead == nil {
e.PerBlockHead = i
e.PerBlockEnd = i
return
}
e.setNext(i, e.PerBlockHead)
e.setPrev(e.PerBlockHead, i)
e.PerBlockHead = i
}
// FlushPendingInstructions implements ExecutableContext.
func (e *ExecutableContextT[T]) FlushPendingInstructions() {
l := len(e.PendingInstructions)
if l == 0 {
return
}
for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order.
e.insertAtPerBlockHead(e.PendingInstructions[i])
}
e.PendingInstructions = e.PendingInstructions[:0]
}
func (e *ExecutableContextT[T]) Reset() {
e.labelPositionPool.Reset()
e.InstructionPool.Reset()
for l := Label(0); l <= e.NextLabel; l++ {
delete(e.LabelPositions, l)
}
e.PendingInstructions = e.PendingInstructions[:0]
e.OrderedBlockLabels = e.OrderedBlockLabels[:0]
e.RootInstr = nil
e.SsaBlockIDToLabels = e.SsaBlockIDToLabels[:0]
e.PerBlockHead, e.PerBlockEnd = nil, nil
e.NextLabel = LabelInvalid
}
// AllocateLabel allocates an unused label.
func (e *ExecutableContextT[T]) AllocateLabel() Label {
e.NextLabel++
return e.NextLabel
}
func (e *ExecutableContextT[T]) AllocateLabelPosition(la Label) *LabelPosition[T] {
l := e.labelPositionPool.Allocate()
l.L = la
return l
}
func (e *ExecutableContextT[T]) GetOrAllocateSSABlockLabel(blk ssa.BasicBlock) Label {
if blk.ReturnBlock() {
return LabelReturn
}
l := e.SsaBlockIDToLabels[blk.ID()]
if l == LabelInvalid {
l = e.AllocateLabel()
e.SsaBlockIDToLabels[blk.ID()] = l
}
return l
}
func (e *ExecutableContextT[T]) allocateNop0() *T {
i := e.InstructionPool.Allocate()
e.asNop(i)
return i
}
// LinkAdjacentBlocks implements backend.Machine.
func (e *ExecutableContextT[T]) LinkAdjacentBlocks(prev, next ssa.BasicBlock) {
prevLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(prev)]
nextLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(next)]
e.setNext(prevLabelPos.End, nextLabelPos.Begin)
}
// LabelPosition represents the regions of the generated code which the label represents.
type LabelPosition[Instr any] struct {
SB ssa.BasicBlock
L Label
Begin, End *Instr
BinaryOffset int64
}
// Label represents a position in the generated code which is either
// a real instruction or the constant InstructionPool (e.g. jump tables).
//
// This is exactly the same as the traditional "label" in assembly code.
type Label uint32
const (
LabelInvalid Label = 0
LabelReturn Label = math.MaxUint32
)
// String implements backend.Machine.
func (l Label) String() string {
return fmt.Sprintf("L%d", l)
}

View File

@ -0,0 +1,33 @@
package backend
import "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
// GoFunctionCallRequiredStackSize returns the size of the stack required for the Go function call.
// argBegin is the index of the first argument in the signature which is not either execution context or module context.
func GoFunctionCallRequiredStackSize(sig *ssa.Signature, argBegin int) (ret, retUnaligned int64) {
var paramNeededInBytes, resultNeededInBytes int64
for _, p := range sig.Params[argBegin:] {
s := int64(p.Size())
if s < 8 {
s = 8 // We use uint64 for all basic types, except SIMD v128.
}
paramNeededInBytes += s
}
for _, r := range sig.Results {
s := int64(r.Size())
if s < 8 {
s = 8 // We use uint64 for all basic types, except SIMD v128.
}
resultNeededInBytes += s
}
if paramNeededInBytes > resultNeededInBytes {
ret = paramNeededInBytes
} else {
ret = resultNeededInBytes
}
retUnaligned = ret
// Align to 16 bytes.
ret = (ret + 15) &^ 15
return
}

View File

@ -0,0 +1,186 @@
package amd64
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
// For the details of the ABI, see:
// https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#amd64-architecture
var (
intArgResultRegs = []regalloc.RealReg{rax, rbx, rcx, rdi, rsi, r8, r9, r10, r11}
floatArgResultRegs = []regalloc.RealReg{xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7}
)
var regInfo = &regalloc.RegisterInfo{
AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{
regalloc.RegTypeInt: {
rax, rcx, rdx, rbx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15,
},
regalloc.RegTypeFloat: {
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
},
},
CalleeSavedRegisters: regalloc.NewRegSet(
rdx, r12, r13, r14, r15,
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
),
CallerSavedRegisters: regalloc.NewRegSet(
rax, rcx, rbx, rsi, rdi, r8, r9, r10, r11,
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
),
RealRegToVReg: []regalloc.VReg{
rax: raxVReg, rcx: rcxVReg, rdx: rdxVReg, rbx: rbxVReg, rsp: rspVReg, rbp: rbpVReg, rsi: rsiVReg, rdi: rdiVReg,
r8: r8VReg, r9: r9VReg, r10: r10VReg, r11: r11VReg, r12: r12VReg, r13: r13VReg, r14: r14VReg, r15: r15VReg,
xmm0: xmm0VReg, xmm1: xmm1VReg, xmm2: xmm2VReg, xmm3: xmm3VReg, xmm4: xmm4VReg, xmm5: xmm5VReg, xmm6: xmm6VReg,
xmm7: xmm7VReg, xmm8: xmm8VReg, xmm9: xmm9VReg, xmm10: xmm10VReg, xmm11: xmm11VReg, xmm12: xmm12VReg,
xmm13: xmm13VReg, xmm14: xmm14VReg, xmm15: xmm15VReg,
},
RealRegName: func(r regalloc.RealReg) string { return regNames[r] },
RealRegType: func(r regalloc.RealReg) regalloc.RegType {
if r < xmm0 {
return regalloc.RegTypeInt
}
return regalloc.RegTypeFloat
},
}
// ArgsResultsRegs implements backend.Machine.
func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) {
return intArgResultRegs, floatArgResultRegs
}
// LowerParams implements backend.Machine.
func (m *machine) LowerParams(args []ssa.Value) {
a := m.currentABI
for i, ssaArg := range args {
if !ssaArg.Valid() {
continue
}
reg := m.c.VRegOf(ssaArg)
arg := &a.Args[i]
if arg.Kind == backend.ABIArgKindReg {
m.InsertMove(reg, arg.Reg, arg.Type)
} else {
//
// (high address)
// +-----------------+
// | ....... |
// | ret Y |
// | ....... |
// | ret 0 |
// | arg X |
// | ....... |
// | arg 1 |
// | arg 0 |
// | ReturnAddress |
// | Caller_RBP |
// +-----------------+ <-- RBP
// | ........... |
// | clobbered M |
// | ............ |
// | clobbered 0 |
// | spill slot N |
// | ........... |
// | spill slot 0 |
// RSP--> +-----------------+
// (low address)
// Load the value from the arg stack slot above the current RBP.
load := m.allocateInstr()
mem := newOperandMem(m.newAmodeImmRBPReg(uint32(arg.Offset + 16)))
switch arg.Type {
case ssa.TypeI32:
load.asMovzxRmR(extModeLQ, mem, reg)
case ssa.TypeI64:
load.asMov64MR(mem, reg)
case ssa.TypeF32:
load.asXmmUnaryRmR(sseOpcodeMovss, mem, reg)
case ssa.TypeF64:
load.asXmmUnaryRmR(sseOpcodeMovsd, mem, reg)
case ssa.TypeV128:
load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, reg)
default:
panic("BUG")
}
m.insert(load)
}
}
}
// LowerReturns implements backend.Machine.
func (m *machine) LowerReturns(rets []ssa.Value) {
// Load the XMM registers first as it might need a temporary register to inline
// constant return.
a := m.currentABI
for i, ret := range rets {
r := &a.Rets[i]
if !r.Type.IsInt() {
m.LowerReturn(ret, r)
}
}
// Then load the GPR registers.
for i, ret := range rets {
r := &a.Rets[i]
if r.Type.IsInt() {
m.LowerReturn(ret, r)
}
}
}
func (m *machine) LowerReturn(ret ssa.Value, r *backend.ABIArg) {
reg := m.c.VRegOf(ret)
if def := m.c.ValueDefinition(ret); def.IsFromInstr() {
// Constant instructions are inlined.
if inst := def.Instr; inst.Constant() {
m.insertLoadConstant(inst, reg)
}
}
if r.Kind == backend.ABIArgKindReg {
m.InsertMove(r.Reg, reg, ret.Type())
} else {
//
// (high address)
// +-----------------+
// | ....... |
// | ret Y |
// | ....... |
// | ret 0 |
// | arg X |
// | ....... |
// | arg 1 |
// | arg 0 |
// | ReturnAddress |
// | Caller_RBP |
// +-----------------+ <-- RBP
// | ........... |
// | clobbered M |
// | ............ |
// | clobbered 0 |
// | spill slot N |
// | ........... |
// | spill slot 0 |
// RSP--> +-----------------+
// (low address)
// Store the value to the return stack slot above the current RBP.
store := m.allocateInstr()
mem := newOperandMem(m.newAmodeImmRBPReg(uint32(m.currentABI.ArgStackSize + 16 + r.Offset)))
switch r.Type {
case ssa.TypeI32:
store.asMovRM(reg, mem, 4)
case ssa.TypeI64:
store.asMovRM(reg, mem, 8)
case ssa.TypeF32:
store.asXmmMovRM(sseOpcodeMovss, reg, mem)
case ssa.TypeF64:
store.asXmmMovRM(sseOpcodeMovsd, reg, mem)
case ssa.TypeV128:
store.asXmmMovRM(sseOpcodeMovdqu, reg, mem)
}
m.insert(store)
}
}

View File

@ -0,0 +1,9 @@
package amd64
// entrypoint enters the machine code generated by this backend which begins with the preamble generated by functionABI.EmitGoEntryPreamble below.
// This implements wazevo.entrypoint, and see the comments there for detail.
func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr)
// afterGoFunctionCallEntrypoint enters the machine code after growing the stack.
// This implements wazevo.afterGoFunctionCallEntrypoint, and see the comments there for detail.
func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)

View File

@ -0,0 +1,29 @@
#include "funcdata.h"
#include "textflag.h"
// entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr
TEXT ·entrypoint(SB), NOSPLIT|NOFRAME, $0-48
MOVQ preambleExecutable+0(FP), R11
MOVQ functionExectuable+8(FP), R14
MOVQ executionContextPtr+16(FP), AX // First argument is passed in AX.
MOVQ moduleContextPtr+24(FP), BX // Second argument is passed in BX.
MOVQ paramResultSlicePtr+32(FP), R12
MOVQ goAllocatedStackSlicePtr+40(FP), R13
JMP R11
// afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
TEXT ·afterGoFunctionCallEntrypoint(SB), NOSPLIT|NOFRAME, $0-32
MOVQ executable+0(FP), CX
MOVQ executionContextPtr+8(FP), AX // First argument is passed in AX.
// Save the stack pointer and frame pointer.
MOVQ BP, 16(AX) // 16 == ExecutionContextOffsetOriginalFramePointer
MOVQ SP, 24(AX) // 24 == ExecutionContextOffsetOriginalStackPointer
// Then set the stack pointer and frame pointer to the values we got from the Go runtime.
MOVQ framePointer+24(FP), BP
// WARNING: do not update SP before BP, because the Go translates (FP) as (SP) + 8.
MOVQ stackPointer+16(FP), SP
JMP CX

View File

@ -0,0 +1,248 @@
package amd64
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
var (
executionContextPtrReg = raxVReg
// Followings are callee saved registers. They can be used freely in the entry preamble
// since the preamble is called via Go assembly function which has stack-based ABI.
// savedExecutionContextPtr also must be a callee-saved reg so that they can be used in the prologue and epilogue.
savedExecutionContextPtr = rdxVReg
// paramResultSlicePtr must match with entrypoint function in abi_entry_amd64.s.
paramResultSlicePtr = r12VReg
// goAllocatedStackPtr must match with entrypoint function in abi_entry_amd64.s.
goAllocatedStackPtr = r13VReg
// functionExecutable must match with entrypoint function in abi_entry_amd64.s.
functionExecutable = r14VReg
tmpIntReg = r15VReg
tmpXmmReg = xmm15VReg
)
// CompileEntryPreamble implements backend.Machine.
func (m *machine) CompileEntryPreamble(sig *ssa.Signature) []byte {
root := m.compileEntryPreamble(sig)
m.encodeWithoutSSA(root)
buf := m.c.Buf()
return buf
}
func (m *machine) compileEntryPreamble(sig *ssa.Signature) *instruction {
abi := backend.FunctionABI{}
abi.Init(sig, intArgResultRegs, floatArgResultRegs)
root := m.allocateNop()
//// ----------------------------------- prologue ----------------------------------- ////
// First, we save executionContextPtrReg into a callee-saved register so that it can be used in epilogue as well.
// mov %executionContextPtrReg, %savedExecutionContextPtr
cur := m.move64(executionContextPtrReg, savedExecutionContextPtr, root)
// Next is to save the original RBP and RSP into the execution context.
cur = m.saveOriginalRSPRBP(cur)
// Now set the RSP to the Go-allocated stack pointer.
// mov %goAllocatedStackPtr, %rsp
cur = m.move64(goAllocatedStackPtr, rspVReg, cur)
if stackSlotSize := abi.AlignedArgResultStackSlotSize(); stackSlotSize > 0 {
// Allocate stack slots for the arguments and return values.
// sub $stackSlotSize, %rsp
spDec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(stackSlotSize)), rspVReg, true)
cur = linkInstr(cur, spDec)
}
var offset uint32
for i := range abi.Args {
if i < 2 {
// module context ptr and execution context ptr are passed in rax and rbx by the Go assembly function.
continue
}
arg := &abi.Args[i]
cur = m.goEntryPreamblePassArg(cur, paramResultSlicePtr, offset, arg)
if arg.Type == ssa.TypeV128 {
offset += 16
} else {
offset += 8
}
}
// Zero out RBP so that the unwind/stack growth code can correctly detect the end of the stack.
zerosRbp := m.allocateInstr().asAluRmiR(aluRmiROpcodeXor, newOperandReg(rbpVReg), rbpVReg, true)
cur = linkInstr(cur, zerosRbp)
// Now ready to call the real function. Note that at this point stack pointer is already set to the Go-allocated,
// which is aligned to 16 bytes.
call := m.allocateInstr().asCallIndirect(newOperandReg(functionExecutable), &abi)
cur = linkInstr(cur, call)
//// ----------------------------------- epilogue ----------------------------------- ////
// Read the results from regs and the stack, and set them correctly into the paramResultSlicePtr.
offset = 0
for i := range abi.Rets {
r := &abi.Rets[i]
cur = m.goEntryPreamblePassResult(cur, paramResultSlicePtr, offset, r, uint32(abi.ArgStackSize))
if r.Type == ssa.TypeV128 {
offset += 16
} else {
offset += 8
}
}
// Finally, restore the original RBP and RSP.
cur = m.restoreOriginalRSPRBP(cur)
ret := m.allocateInstr().asRet()
linkInstr(cur, ret)
return root
}
// saveOriginalRSPRBP saves the original RSP and RBP into the execution context.
func (m *machine) saveOriginalRSPRBP(cur *instruction) *instruction {
// mov %rbp, wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg)
// mov %rsp, wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg)
cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, true, cur)
cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, true, cur)
return cur
}
// restoreOriginalRSPRBP restores the original RSP and RBP from the execution context.
func (m *machine) restoreOriginalRSPRBP(cur *instruction) *instruction {
// mov wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg), %rbp
// mov wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg), %rsp
cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, false, cur)
cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, false, cur)
return cur
}
func (m *machine) move64(src, dst regalloc.VReg, prev *instruction) *instruction {
mov := m.allocateInstr().asMovRR(src, dst, true)
return linkInstr(prev, mov)
}
func (m *machine) loadOrStore64AtExecutionCtx(execCtx regalloc.VReg, offset wazevoapi.Offset, r regalloc.VReg, store bool, prev *instruction) *instruction {
mem := newOperandMem(m.newAmodeImmReg(offset.U32(), execCtx))
instr := m.allocateInstr()
if store {
instr.asMovRM(r, mem, 8)
} else {
instr.asMov64MR(mem, r)
}
return linkInstr(prev, instr)
}
// This is for debugging.
func (m *machine) linkUD2(cur *instruction) *instruction { //nolint
return linkInstr(cur, m.allocateInstr().asUD2())
}
func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regalloc.VReg, offsetInParamSlice uint32, arg *backend.ABIArg) *instruction {
var dst regalloc.VReg
argTyp := arg.Type
if arg.Kind == backend.ABIArgKindStack {
// Caller saved registers ca
switch argTyp {
case ssa.TypeI32, ssa.TypeI64:
dst = tmpIntReg
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
dst = tmpXmmReg
default:
panic("BUG")
}
} else {
dst = arg.Reg
}
load := m.allocateInstr()
a := newOperandMem(m.newAmodeImmReg(offsetInParamSlice, paramSlicePtr))
switch arg.Type {
case ssa.TypeI32:
load.asMovzxRmR(extModeLQ, a, dst)
case ssa.TypeI64:
load.asMov64MR(a, dst)
case ssa.TypeF32:
load.asXmmUnaryRmR(sseOpcodeMovss, a, dst)
case ssa.TypeF64:
load.asXmmUnaryRmR(sseOpcodeMovsd, a, dst)
case ssa.TypeV128:
load.asXmmUnaryRmR(sseOpcodeMovdqu, a, dst)
}
cur = linkInstr(cur, load)
if arg.Kind == backend.ABIArgKindStack {
// Store back to the stack.
store := m.allocateInstr()
a := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset), rspVReg))
switch arg.Type {
case ssa.TypeI32:
store.asMovRM(dst, a, 4)
case ssa.TypeI64:
store.asMovRM(dst, a, 8)
case ssa.TypeF32:
store.asXmmMovRM(sseOpcodeMovss, dst, a)
case ssa.TypeF64:
store.asXmmMovRM(sseOpcodeMovsd, dst, a)
case ssa.TypeV128:
store.asXmmMovRM(sseOpcodeMovdqu, dst, a)
}
cur = linkInstr(cur, store)
}
return cur
}
func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr regalloc.VReg, offsetInResultSlice uint32, result *backend.ABIArg, resultStackSlotBeginOffset uint32) *instruction {
var r regalloc.VReg
if result.Kind == backend.ABIArgKindStack {
// Load the value to the temporary.
load := m.allocateInstr()
offset := resultStackSlotBeginOffset + uint32(result.Offset)
a := newOperandMem(m.newAmodeImmReg(offset, rspVReg))
switch result.Type {
case ssa.TypeI32:
r = tmpIntReg
load.asMovzxRmR(extModeLQ, a, r)
case ssa.TypeI64:
r = tmpIntReg
load.asMov64MR(a, r)
case ssa.TypeF32:
r = tmpXmmReg
load.asXmmUnaryRmR(sseOpcodeMovss, a, r)
case ssa.TypeF64:
r = tmpXmmReg
load.asXmmUnaryRmR(sseOpcodeMovsd, a, r)
case ssa.TypeV128:
r = tmpXmmReg
load.asXmmUnaryRmR(sseOpcodeMovdqu, a, r)
default:
panic("BUG")
}
cur = linkInstr(cur, load)
} else {
r = result.Reg
}
store := m.allocateInstr()
a := newOperandMem(m.newAmodeImmReg(offsetInResultSlice, resultSlicePtr))
switch result.Type {
case ssa.TypeI32:
store.asMovRM(r, a, 4)
case ssa.TypeI64:
store.asMovRM(r, a, 8)
case ssa.TypeF32:
store.asXmmMovRM(sseOpcodeMovss, r, a)
case ssa.TypeF64:
store.asXmmMovRM(sseOpcodeMovsd, r, a)
case ssa.TypeV128:
store.asXmmMovRM(sseOpcodeMovdqu, r, a)
}
return linkInstr(cur, store)
}

View File

@ -0,0 +1,443 @@
package amd64
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
var calleeSavedVRegs = []regalloc.VReg{
rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
}
// CompileGoFunctionTrampoline implements backend.Machine.
func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte {
ectx := m.ectx
argBegin := 1 // Skips exec context by default.
if needModuleContextPtr {
argBegin++
}
abi := &backend.FunctionABI{}
abi.Init(sig, intArgResultRegs, floatArgResultRegs)
m.currentABI = abi
cur := m.allocateNop()
ectx.RootInstr = cur
// Execution context is always the first argument.
execCtrPtr := raxVReg
// First we update RBP and RSP just like the normal prologue.
//
// (high address) (high address)
// RBP ----> +-----------------+ +-----------------+
// | ....... | | ....... |
// | ret Y | | ret Y |
// | ....... | | ....... |
// | ret 0 | | ret 0 |
// | arg X | | arg X |
// | ....... | ====> | ....... |
// | arg 1 | | arg 1 |
// | arg 0 | | arg 0 |
// | Return Addr | | Return Addr |
// RSP ----> +-----------------+ | Caller_RBP |
// (low address) +-----------------+ <----- RSP, RBP
//
cur = m.setupRBPRSP(cur)
goSliceSizeAligned, goSliceSizeAlignedUnaligned := backend.GoFunctionCallRequiredStackSize(sig, argBegin)
cur = m.insertStackBoundsCheck(goSliceSizeAligned+8 /* size of the Go slice */, cur)
// Save the callee saved registers.
cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs)
if needModuleContextPtr {
moduleCtrPtr := rbxVReg // Module context is always the second argument.
mem := m.newAmodeImmReg(
wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.U32(),
execCtrPtr)
store := m.allocateInstr().asMovRM(moduleCtrPtr, newOperandMem(mem), 8)
cur = linkInstr(cur, store)
}
// Now let's advance the RSP to the stack slot for the arguments.
//
// (high address) (high address)
// +-----------------+ +-----------------+
// | ....... | | ....... |
// | ret Y | | ret Y |
// | ....... | | ....... |
// | ret 0 | | ret 0 |
// | arg X | | arg X |
// | ....... | =======> | ....... |
// | arg 1 | | arg 1 |
// | arg 0 | | arg 0 |
// | Return Addr | | Return Addr |
// | Caller_RBP | | Caller_RBP |
// RBP,RSP --> +-----------------+ +-----------------+ <----- RBP
// (low address) | arg[N]/ret[M] |
// | .......... |
// | arg[1]/ret[1] |
// | arg[0]/ret[0] |
// +-----------------+ <----- RSP
// (low address)
//
// where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions,
// therefore will be accessed as the usual []uint64. So that's where we need to pass/receive
// the arguments/return values to/from Go function.
cur = m.addRSP(-int32(goSliceSizeAligned), cur)
// Next, we need to store all the arguments to the stack in the typical Wasm stack style.
var offsetInGoSlice int32
for i := range abi.Args[argBegin:] {
arg := &abi.Args[argBegin+i]
var v regalloc.VReg
if arg.Kind == backend.ABIArgKindReg {
v = arg.Reg
} else {
// We have saved callee saved registers, so we can use them.
if arg.Type.IsInt() {
v = r15VReg
} else {
v = xmm15VReg
}
mem := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
load := m.allocateInstr()
switch arg.Type {
case ssa.TypeI32:
load.asMovzxRmR(extModeLQ, mem, v)
case ssa.TypeI64:
load.asMov64MR(mem, v)
case ssa.TypeF32:
load.asXmmUnaryRmR(sseOpcodeMovss, mem, v)
case ssa.TypeF64:
load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v)
case ssa.TypeV128:
load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
default:
panic("BUG")
}
cur = linkInstr(cur, load)
}
store := m.allocateInstr()
mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg))
switch arg.Type {
case ssa.TypeI32:
store.asMovRM(v, mem, 4)
offsetInGoSlice += 8 // always uint64 rep.
case ssa.TypeI64:
store.asMovRM(v, mem, 8)
offsetInGoSlice += 8
case ssa.TypeF32:
store.asXmmMovRM(sseOpcodeMovss, v, mem)
offsetInGoSlice += 8 // always uint64 rep.
case ssa.TypeF64:
store.asXmmMovRM(sseOpcodeMovsd, v, mem)
offsetInGoSlice += 8
case ssa.TypeV128:
store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
offsetInGoSlice += 16
default:
panic("BUG")
}
cur = linkInstr(cur, store)
}
// Finally we push the size of the slice to the stack so the stack looks like:
//
// (high address)
// +-----------------+
// | ....... |
// | ret Y |
// | ....... |
// | ret 0 |
// | arg X |
// | ....... |
// | arg 1 |
// | arg 0 |
// | Return Addr |
// | Caller_RBP |
// +-----------------+ <----- RBP
// | arg[N]/ret[M] |
// | .......... |
// | arg[1]/ret[1] |
// | arg[0]/ret[0] |
// | slice size |
// +-----------------+ <----- RSP
// (low address)
//
// push $sliceSize
cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandImm32(uint32(goSliceSizeAlignedUnaligned))))
// Load the exitCode to the register.
exitCodeReg := r12VReg // Callee saved which is already saved.
cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(exitCode), false))
saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg)
cur = linkInstr(cur, setExitCode)
cur = linkInstr(cur, saveRsp)
cur = linkInstr(cur, saveRbp)
// Ready to exit the execution.
cur = m.storeReturnAddressAndExit(cur, execCtrPtr)
// We don't need the slice size anymore, so pop it.
cur = m.addRSP(8, cur)
// Ready to set up the results.
offsetInGoSlice = 0
// To avoid overwriting with the execution context pointer by the result, we need to track the offset,
// and defer the restoration of the result to the end of this function.
var argOverlapWithExecCtxOffset int32 = -1
for i := range abi.Rets {
r := &abi.Rets[i]
var v regalloc.VReg
isRegResult := r.Kind == backend.ABIArgKindReg
if isRegResult {
v = r.Reg
if v.RealReg() == execCtrPtr.RealReg() {
argOverlapWithExecCtxOffset = offsetInGoSlice
offsetInGoSlice += 8 // always uint64 rep.
continue
}
} else {
if r.Type.IsInt() {
v = r15VReg
} else {
v = xmm15VReg
}
}
load := m.allocateInstr()
mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg))
switch r.Type {
case ssa.TypeI32:
load.asMovzxRmR(extModeLQ, mem, v)
offsetInGoSlice += 8 // always uint64 rep.
case ssa.TypeI64:
load.asMov64MR(mem, v)
offsetInGoSlice += 8
case ssa.TypeF32:
load.asXmmUnaryRmR(sseOpcodeMovss, mem, v)
offsetInGoSlice += 8 // always uint64 rep.
case ssa.TypeF64:
load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v)
offsetInGoSlice += 8
case ssa.TypeV128:
load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
offsetInGoSlice += 16
default:
panic("BUG")
}
cur = linkInstr(cur, load)
if !isRegResult {
// We need to store it back to the result slot above rbp.
store := m.allocateInstr()
mem := newOperandMem(m.newAmodeImmReg(uint32(abi.ArgStackSize+r.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
switch r.Type {
case ssa.TypeI32:
store.asMovRM(v, mem, 4)
case ssa.TypeI64:
store.asMovRM(v, mem, 8)
case ssa.TypeF32:
store.asXmmMovRM(sseOpcodeMovss, v, mem)
case ssa.TypeF64:
store.asXmmMovRM(sseOpcodeMovsd, v, mem)
case ssa.TypeV128:
store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
default:
panic("BUG")
}
cur = linkInstr(cur, store)
}
}
// Before return, we need to restore the callee saved registers.
cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs)
if argOverlapWithExecCtxOffset >= 0 {
// At this point execCtt is not used anymore, so we can finally store the
// result to the register which overlaps with the execution context pointer.
mem := newOperandMem(m.newAmodeImmReg(uint32(argOverlapWithExecCtxOffset), rspVReg))
load := m.allocateInstr().asMov64MR(mem, execCtrPtr)
cur = linkInstr(cur, load)
}
// Finally ready to return.
cur = m.revertRBPRSP(cur)
linkInstr(cur, m.allocateInstr().asRet())
m.encodeWithoutSSA(ectx.RootInstr)
return m.c.Buf()
}
func (m *machine) saveRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction {
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
for _, v := range regs {
store := m.allocateInstr()
mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx))
switch v.RegType() {
case regalloc.RegTypeInt:
store.asMovRM(v, mem, 8)
case regalloc.RegTypeFloat:
store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
default:
panic("BUG")
}
cur = linkInstr(cur, store)
offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
}
return cur
}
func (m *machine) restoreRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction {
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
for _, v := range regs {
load := m.allocateInstr()
mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx))
switch v.RegType() {
case regalloc.RegTypeInt:
load.asMov64MR(mem, v)
case regalloc.RegTypeFloat:
load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
default:
panic("BUG")
}
cur = linkInstr(cur, load)
offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
}
return cur
}
func (m *machine) storeReturnAddressAndExit(cur *instruction, execCtx regalloc.VReg) *instruction {
readRip := m.allocateInstr()
cur = linkInstr(cur, readRip)
ripReg := r12VReg // Callee saved which is already saved.
saveRip := m.allocateInstr().asMovRM(
ripReg,
newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetGoCallReturnAddress.U32(), execCtx)),
8,
)
cur = linkInstr(cur, saveRip)
exit := m.allocateExitSeq(execCtx)
cur = linkInstr(cur, exit)
nop, l := m.allocateBrTarget()
cur = linkInstr(cur, nop)
readRip.asLEA(newOperandLabel(l), ripReg)
return cur
}
// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
// stack space left. Basically this is the all allocatable registers except for RSP and RBP, and RAX which contains the
// execution context pointer. ExecCtx pointer is always the first argument so we don't need to save it.
var stackGrowSaveVRegs = []regalloc.VReg{
rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
rcxVReg, rbxVReg, rsiVReg, rdiVReg, r8VReg, r9VReg, r10VReg, r11VReg,
xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
xmm0VReg, xmm1VReg, xmm2VReg, xmm3VReg, xmm4VReg, xmm5VReg, xmm6VReg, xmm7VReg,
}
// CompileStackGrowCallSequence implements backend.Machine.
func (m *machine) CompileStackGrowCallSequence() []byte {
ectx := m.ectx
cur := m.allocateNop()
ectx.RootInstr = cur
cur = m.setupRBPRSP(cur)
// Execution context is always the first argument.
execCtrPtr := raxVReg
// Save the callee saved and argument registers.
cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs)
// Load the exitCode to the register.
exitCodeReg := r12VReg // Already saved.
cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(wazevoapi.ExitCodeGrowStack), false))
saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg)
cur = linkInstr(cur, setExitCode)
cur = linkInstr(cur, saveRsp)
cur = linkInstr(cur, saveRbp)
// Ready to exit the execution.
cur = m.storeReturnAddressAndExit(cur, execCtrPtr)
// After the exit, restore the saved registers.
cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs)
// Finally ready to return.
cur = m.revertRBPRSP(cur)
linkInstr(cur, m.allocateInstr().asRet())
m.encodeWithoutSSA(ectx.RootInstr)
return m.c.Buf()
}
// insertStackBoundsCheck will insert the instructions after `cur` to check the
// stack bounds, and if there's no sufficient spaces required for the function,
// exit the execution and try growing it in Go world.
func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction {
// add $requiredStackSize, %rsp ;; Temporarily update the sp.
// cmp ExecutionContextOffsetStackBottomPtr(%rax), %rsp ;; Compare the stack bottom and the sp.
// ja .ok
// sub $requiredStackSize, %rsp ;; Reverse the temporary update.
// pushq r15 ;; save the temporary.
// mov $requiredStackSize, %r15
// mov %15, ExecutionContextOffsetStackGrowRequiredSize(%rax) ;; Set the required size in the execution context.
// popq r15 ;; restore the temporary.
// callq *ExecutionContextOffsetStackGrowCallTrampolineAddress(%rax) ;; Call the Go function to grow the stack.
// jmp .cont
// .ok:
// sub $requiredStackSize, %rsp ;; Reverse the temporary update.
// .cont:
cur = m.addRSP(-int32(requiredStackSize), cur)
cur = linkInstr(cur, m.allocateInstr().asCmpRmiR(true,
newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackBottomPtr.U32(), raxVReg)),
rspVReg, true))
ja := m.allocateInstr()
cur = linkInstr(cur, ja)
cur = m.addRSP(int32(requiredStackSize), cur)
// Save the temporary.
cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r15VReg)))
// Load the required size to the temporary.
cur = linkInstr(cur, m.allocateInstr().asImm(r15VReg, uint64(requiredStackSize), true))
// Set the required size in the execution context.
cur = linkInstr(cur, m.allocateInstr().asMovRM(r15VReg,
newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.U32(), raxVReg)), 8))
// Restore the temporary.
cur = linkInstr(cur, m.allocateInstr().asPop64(r15VReg))
// Call the Go function to grow the stack.
cur = linkInstr(cur, m.allocateInstr().asCallIndirect(newOperandMem(m.newAmodeImmReg(
wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.U32(), raxVReg)), nil))
// Jump to the continuation.
jmpToCont := m.allocateInstr()
cur = linkInstr(cur, jmpToCont)
// .ok:
okInstr, ok := m.allocateBrTarget()
cur = linkInstr(cur, okInstr)
ja.asJmpIf(condNBE, newOperandLabel(ok))
// On the ok path, we only need to reverse the temporary update.
cur = m.addRSP(int32(requiredStackSize), cur)
// .cont:
contInstr, cont := m.allocateBrTarget()
cur = linkInstr(cur, contInstr)
jmpToCont.asJmp(newOperandLabel(cont))
return cur
}

View File

@ -0,0 +1,168 @@
package amd64
import (
"fmt"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
type cond byte
const (
// condO represents (overflow) condition.
condO cond = iota
// condNO represents (no overflow) condition.
condNO
// condB represents (< unsigned) condition.
condB
// condNB represents (>= unsigned) condition.
condNB
// condZ represents (zero) condition.
condZ
// condNZ represents (not-zero) condition.
condNZ
// condBE represents (<= unsigned) condition.
condBE
// condNBE represents (> unsigned) condition.
condNBE
// condS represents (negative) condition.
condS
// condNS represents (not-negative) condition.
condNS
// condP represents (parity) condition.
condP
// condNP represents (not parity) condition.
condNP
// condL represents (< signed) condition.
condL
// condNL represents (>= signed) condition.
condNL
// condLE represents (<= signed) condition.
condLE
// condNLE represents (> signed) condition.
condNLE
condInvalid
)
func (c cond) String() string {
switch c {
case condO:
return "o"
case condNO:
return "no"
case condB:
return "b"
case condNB:
return "nb"
case condZ:
return "z"
case condNZ:
return "nz"
case condBE:
return "be"
case condNBE:
return "nbe"
case condS:
return "s"
case condNS:
return "ns"
case condL:
return "l"
case condNL:
return "nl"
case condLE:
return "le"
case condNLE:
return "nle"
case condP:
return "p"
case condNP:
return "np"
default:
panic("unreachable")
}
}
func condFromSSAIntCmpCond(origin ssa.IntegerCmpCond) cond {
switch origin {
case ssa.IntegerCmpCondEqual:
return condZ
case ssa.IntegerCmpCondNotEqual:
return condNZ
case ssa.IntegerCmpCondSignedLessThan:
return condL
case ssa.IntegerCmpCondSignedGreaterThanOrEqual:
return condNL
case ssa.IntegerCmpCondSignedGreaterThan:
return condNLE
case ssa.IntegerCmpCondSignedLessThanOrEqual:
return condLE
case ssa.IntegerCmpCondUnsignedLessThan:
return condB
case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual:
return condNB
case ssa.IntegerCmpCondUnsignedGreaterThan:
return condNBE
case ssa.IntegerCmpCondUnsignedLessThanOrEqual:
return condBE
default:
panic("unreachable")
}
}
func condFromSSAFloatCmpCond(origin ssa.FloatCmpCond) cond {
switch origin {
case ssa.FloatCmpCondGreaterThanOrEqual:
return condNB
case ssa.FloatCmpCondGreaterThan:
return condNBE
case ssa.FloatCmpCondEqual, ssa.FloatCmpCondNotEqual, ssa.FloatCmpCondLessThan, ssa.FloatCmpCondLessThanOrEqual:
panic(fmt.Sprintf("cond %s must be treated as a special case", origin))
default:
panic("unreachable")
}
}
func (c cond) encoding() byte {
return byte(c)
}
func (c cond) invert() cond {
switch c {
case condO:
return condNO
case condNO:
return condO
case condB:
return condNB
case condNB:
return condB
case condZ:
return condNZ
case condNZ:
return condZ
case condBE:
return condNBE
case condNBE:
return condBE
case condS:
return condNS
case condNS:
return condS
case condP:
return condNP
case condNP:
return condP
case condL:
return condNL
case condNL:
return condL
case condLE:
return condNLE
case condNLE:
return condLE
default:
panic("unreachable")
}
}

View File

@ -0,0 +1,35 @@
package amd64
// extMode represents the mode of extension in movzx/movsx.
type extMode byte
const (
// extModeBL represents Byte -> Longword.
extModeBL extMode = iota
// extModeBQ represents Byte -> Quadword.
extModeBQ
// extModeWL represents Word -> Longword.
extModeWL
// extModeWQ represents Word -> Quadword.
extModeWQ
// extModeLQ represents Longword -> Quadword.
extModeLQ
)
// String implements fmt.Stringer.
func (e extMode) String() string {
switch e {
case extModeBL:
return "bl"
case extModeBQ:
return "bq"
case extModeWL:
return "wl"
case extModeWQ:
return "wq"
case extModeLQ:
return "lq"
default:
panic("BUG: invalid ext mode")
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,71 @@
package amd64
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
// lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
val := instr.Return()
valType := val.Type()
vr = m.c.AllocateVReg(valType)
m.insertLoadConstant(instr, vr)
return
}
// InsertLoadConstantBlockArg implements backend.Machine.
func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) {
m.insertLoadConstant(instr, vr)
}
func (m *machine) insertLoadConstant(instr *ssa.Instruction, vr regalloc.VReg) {
val := instr.Return()
valType := val.Type()
v := instr.ConstantVal()
bits := valType.Bits()
if bits < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
v = v & ((1 << valType.Bits()) - 1)
}
switch valType {
case ssa.TypeF32, ssa.TypeF64:
m.lowerFconst(vr, v, bits == 64)
case ssa.TypeI32, ssa.TypeI64:
m.lowerIconst(vr, v, bits == 64)
default:
panic("BUG")
}
}
func (m *machine) lowerFconst(dst regalloc.VReg, c uint64, _64 bool) {
if c == 0 {
xor := m.allocateInstr().asZeros(dst)
m.insert(xor)
} else {
var tmpType ssa.Type
if _64 {
tmpType = ssa.TypeI64
} else {
tmpType = ssa.TypeI32
}
tmpInt := m.c.AllocateVReg(tmpType)
loadToGP := m.allocateInstr().asImm(tmpInt, c, _64)
m.insert(loadToGP)
movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpInt), dst, _64)
m.insert(movToXmm)
}
}
func (m *machine) lowerIconst(dst regalloc.VReg, c uint64, _64 bool) {
i := m.allocateInstr()
if c == 0 {
i.asZeros(dst)
} else {
i.asImm(dst, c, _64)
}
m.insert(i)
}

View File

@ -0,0 +1,187 @@
package amd64
import (
"fmt"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
var addendsMatchOpcodes = [...]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst, ssa.OpcodeIshl}
type addend struct {
r regalloc.VReg
off int64
shift byte
}
func (a addend) String() string {
return fmt.Sprintf("addend{r=%s, off=%d, shift=%d}", a.r, a.off, a.shift)
}
// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32) (am *amode) {
def := m.c.ValueDefinition(ptr)
if offsetBase&0x80000000 != 0 {
// Special casing the huge base offset whose MSB is set. In x64, the immediate is always
// sign-extended, but our IR semantics requires the offset base is always unsigned.
// Note that this should be extremely rare or even this shouldn't hit in the real application,
// therefore we don't need to optimize this case in my opinion.
a := m.lowerAddend(def)
off64 := a.off + int64(offsetBase)
offsetBaseReg := m.c.AllocateVReg(ssa.TypeI64)
m.lowerIconst(offsetBaseReg, uint64(off64), true)
if a.r != regalloc.VRegInvalid {
return m.newAmodeRegRegShift(0, offsetBaseReg, a.r, a.shift)
} else {
return m.newAmodeImmReg(0, offsetBaseReg)
}
}
if op := m.c.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op == ssa.OpcodeIadd {
add := def.Instr
x, y := add.Arg2()
xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
ax := m.lowerAddend(xDef)
ay := m.lowerAddend(yDef)
add.MarkLowered()
return m.lowerAddendsToAmode(ax, ay, offsetBase)
} else {
// If it is not an Iadd, then we lower the one addend.
a := m.lowerAddend(def)
// off is always 0 if r is valid.
if a.r != regalloc.VRegInvalid {
if a.shift != 0 {
tmpReg := m.c.AllocateVReg(ssa.TypeI64)
m.lowerIconst(tmpReg, 0, true)
return m.newAmodeRegRegShift(offsetBase, tmpReg, a.r, a.shift)
}
return m.newAmodeImmReg(offsetBase, a.r)
} else {
off64 := a.off + int64(offsetBase)
tmpReg := m.c.AllocateVReg(ssa.TypeI64)
m.lowerIconst(tmpReg, uint64(off64), true)
return m.newAmodeImmReg(0, tmpReg)
}
}
}
func (m *machine) lowerAddendsToAmode(x, y addend, offBase uint32) *amode {
if x.r != regalloc.VRegInvalid && x.off != 0 || y.r != regalloc.VRegInvalid && y.off != 0 {
panic("invalid input")
}
u64 := uint64(x.off+y.off) + uint64(offBase)
if u64 != 0 {
if _, ok := asImm32(u64, false); !ok {
tmpReg := m.c.AllocateVReg(ssa.TypeI64)
m.lowerIconst(tmpReg, u64, true)
// Blank u64 as it has been already lowered.
u64 = 0
if x.r == regalloc.VRegInvalid {
x.r = tmpReg
} else if y.r == regalloc.VRegInvalid {
y.r = tmpReg
} else {
// We already know that either rx or ry is invalid,
// so we overwrite it with the temporary register.
panic("BUG")
}
}
}
u32 := uint32(u64)
switch {
// We assume rx, ry are valid iff offx, offy are 0.
case x.r != regalloc.VRegInvalid && y.r != regalloc.VRegInvalid:
switch {
case x.shift != 0 && y.shift != 0:
// Cannot absorb two shifted registers, must lower one to a shift instruction.
shifted := m.allocateInstr()
shifted.asShiftR(shiftROpShiftLeft, newOperandImm32(uint32(x.shift)), x.r, true)
m.insert(shifted)
return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift)
case x.shift != 0 && y.shift == 0:
// Swap base and index.
x, y = y, x
fallthrough
default:
return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift)
}
case x.r == regalloc.VRegInvalid && y.r != regalloc.VRegInvalid:
x, y = y, x
fallthrough
case x.r != regalloc.VRegInvalid && y.r == regalloc.VRegInvalid:
if x.shift != 0 {
zero := m.c.AllocateVReg(ssa.TypeI64)
m.lowerIconst(zero, 0, true)
return m.newAmodeRegRegShift(u32, zero, x.r, x.shift)
}
return m.newAmodeImmReg(u32, x.r)
default: // Both are invalid: use the offset.
tmpReg := m.c.AllocateVReg(ssa.TypeI64)
m.lowerIconst(tmpReg, u64, true)
return m.newAmodeImmReg(0, tmpReg)
}
}
func (m *machine) lowerAddend(x *backend.SSAValueDefinition) addend {
if x.IsFromBlockParam() {
return addend{x.BlkParamVReg, 0, 0}
}
// Ensure the addend is not referenced in multiple places; we will discard nested Iadds.
op := m.c.MatchInstrOneOf(x, addendsMatchOpcodes[:])
if op != ssa.OpcodeInvalid && op != ssa.OpcodeIadd {
return m.lowerAddendFromInstr(x.Instr)
}
p := m.getOperand_Reg(x)
return addend{p.reg(), 0, 0}
}
// lowerAddendFromInstr takes an instruction returns a Vreg and an offset that can be used in an address mode.
// The Vreg is regalloc.VRegInvalid if the addend cannot be lowered to a register.
// The offset is 0 if the addend can be lowered to a register.
func (m *machine) lowerAddendFromInstr(instr *ssa.Instruction) addend {
instr.MarkLowered()
switch op := instr.Opcode(); op {
case ssa.OpcodeIconst:
u64 := instr.ConstantVal()
if instr.Return().Type().Bits() == 32 {
return addend{regalloc.VRegInvalid, int64(int32(u64)), 0} // sign-extend.
} else {
return addend{regalloc.VRegInvalid, int64(u64), 0}
}
case ssa.OpcodeUExtend, ssa.OpcodeSExtend:
input := instr.Arg()
inputDef := m.c.ValueDefinition(input)
if input.Type().Bits() != 32 {
panic("BUG: invalid input type " + input.Type().String())
}
constInst := inputDef.IsFromInstr() && inputDef.Instr.Constant()
switch {
case constInst && op == ssa.OpcodeSExtend:
return addend{regalloc.VRegInvalid, int64(uint32(inputDef.Instr.ConstantVal())), 0}
case constInst && op == ssa.OpcodeUExtend:
return addend{regalloc.VRegInvalid, int64(int32(inputDef.Instr.ConstantVal())), 0} // sign-extend!
default:
r := m.getOperand_Reg(inputDef)
return addend{r.reg(), 0, 0}
}
case ssa.OpcodeIshl:
// If the addend is a shift, we can only handle it if the shift amount is a constant.
x, amount := instr.Arg2()
amountDef := m.c.ValueDefinition(amount)
if amountDef.IsFromInstr() && amountDef.Instr.Constant() && amountDef.Instr.ConstantVal() <= 3 {
r := m.getOperand_Reg(m.c.ValueDefinition(x))
return addend{r.reg(), 0, uint8(amountDef.Instr.ConstantVal())}
}
r := m.getOperand_Reg(m.c.ValueDefinition(x))
return addend{r.reg(), 0, 0}
}
panic("BUG: invalid opcode")
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,304 @@
package amd64
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
)
// PostRegAlloc implements backend.Machine.
func (m *machine) PostRegAlloc() {
m.setupPrologue()
m.postRegAlloc()
}
func (m *machine) setupPrologue() {
cur := m.ectx.RootInstr
prevInitInst := cur.next
// At this point, we have the stack layout as follows:
//
// (high address)
// +-----------------+ <----- RBP (somewhere in the middle of the stack)
// | ....... |
// | ret Y |
// | ....... |
// | ret 0 |
// | arg X |
// | ....... |
// | arg 1 |
// | arg 0 |
// | Return Addr |
// RSP ----> +-----------------+
// (low address)
// First, we push the RBP, and update the RBP to the current RSP.
//
// (high address) (high address)
// RBP ----> +-----------------+ +-----------------+
// | ....... | | ....... |
// | ret Y | | ret Y |
// | ....... | | ....... |
// | ret 0 | | ret 0 |
// | arg X | | arg X |
// | ....... | ====> | ....... |
// | arg 1 | | arg 1 |
// | arg 0 | | arg 0 |
// | Return Addr | | Return Addr |
// RSP ----> +-----------------+ | Caller_RBP |
// (low address) +-----------------+ <----- RSP, RBP
//
cur = m.setupRBPRSP(cur)
if !m.stackBoundsCheckDisabled {
cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur)
}
//
// (high address)
// +-----------------+ +-----------------+
// | ....... | | ....... |
// | ret Y | | ret Y |
// | ....... | | ....... |
// | ret 0 | | ret 0 |
// | arg X | | arg X |
// | ....... | | ....... |
// | arg 1 | | arg 1 |
// | arg 0 | | arg 0 |
// | xxxxx | | xxxxx |
// | Return Addr | | Return Addr |
// | Caller_RBP | ====> | Caller_RBP |
// RBP,RSP->+-----------------+ +-----------------+ <----- RBP
// (low address) | clobbered M |
// | clobbered 1 |
// | ........... |
// | clobbered 0 |
// +-----------------+ <----- RSP
//
if regs := m.clobberedRegs; len(regs) > 0 {
for i := range regs {
r := regs[len(regs)-1-i] // Reverse order.
if r.RegType() == regalloc.RegTypeInt {
cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r)))
} else {
// Push the XMM register is not supported by the PUSH instruction.
cur = m.addRSP(-16, cur)
push := m.allocateInstr().asXmmMovRM(
sseOpcodeMovdqu, r, newOperandMem(m.newAmodeImmReg(0, rspVReg)),
)
cur = linkInstr(cur, push)
}
}
}
if size := m.spillSlotSize; size > 0 {
// Simply decrease the RSP to allocate the spill slots.
// sub $size, %rsp
cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(size)), rspVReg, true))
// At this point, we have the stack layout as follows:
//
// (high address)
// +-----------------+
// | ....... |
// | ret Y |
// | ....... |
// | ret 0 |
// | arg X |
// | ....... |
// | arg 1 |
// | arg 0 |
// | ReturnAddress |
// | Caller_RBP |
// +-----------------+ <--- RBP
// | clobbered M |
// | ............ |
// | clobbered 1 |
// | clobbered 0 |
// | spill slot N |
// | ............ |
// | spill slot 0 |
// +-----------------+ <--- RSP
// (low address)
}
linkInstr(cur, prevInitInst)
}
// postRegAlloc does multiple things while walking through the instructions:
// 1. Inserts the epilogue code.
// 2. Removes the redundant copy instruction.
// 3. Inserts the dec/inc RSP instruction right before/after the call instruction.
// 4. Lowering that is supposed to be done after regalloc.
func (m *machine) postRegAlloc() {
ectx := m.ectx
for cur := ectx.RootInstr; cur != nil; cur = cur.next {
switch k := cur.kind; k {
case ret:
m.setupEpilogueAfter(cur.prev)
continue
case fcvtToSintSequence, fcvtToUintSequence:
m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
if k == fcvtToSintSequence {
m.lowerFcvtToSintSequenceAfterRegalloc(cur)
} else {
m.lowerFcvtToUintSequenceAfterRegalloc(cur)
}
prev := cur.prev
next := cur.next
cur := prev
for _, instr := range m.ectx.PendingInstructions {
cur = linkInstr(cur, instr)
}
linkInstr(cur, next)
continue
case xmmCMov:
m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
m.lowerXmmCmovAfterRegAlloc(cur)
prev := cur.prev
next := cur.next
cur := prev
for _, instr := range m.ectx.PendingInstructions {
cur = linkInstr(cur, instr)
}
linkInstr(cur, next)
continue
case idivRemSequence:
m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
m.lowerIDivRemSequenceAfterRegAlloc(cur)
prev := cur.prev
next := cur.next
cur := prev
for _, instr := range m.ectx.PendingInstructions {
cur = linkInstr(cur, instr)
}
linkInstr(cur, next)
continue
case call, callIndirect:
// At this point, reg alloc is done, therefore we can safely insert dec/inc RPS instruction
// right before/after the call instruction. If this is done before reg alloc, the stack slot
// can point to the wrong location and therefore results in a wrong value.
call := cur
next := call.next
_, _, _, _, size := backend.ABIInfoFromUint64(call.u2)
if size > 0 {
dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true)
linkInstr(call.prev, dec)
linkInstr(dec, call)
inc := m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(size), rspVReg, true)
linkInstr(call, inc)
linkInstr(inc, next)
}
continue
}
// Removes the redundant copy instruction.
if cur.IsCopy() && cur.op1.reg().RealReg() == cur.op2.reg().RealReg() {
prev, next := cur.prev, cur.next
// Remove the copy instruction.
prev.next = next
if next != nil {
next.prev = prev
}
}
}
}
func (m *machine) setupEpilogueAfter(cur *instruction) {
prevNext := cur.next
// At this point, we have the stack layout as follows:
//
// (high address)
// +-----------------+
// | ....... |
// | ret Y |
// | ....... |
// | ret 0 |
// | arg X |
// | ....... |
// | arg 1 |
// | arg 0 |
// | ReturnAddress |
// | Caller_RBP |
// +-----------------+ <--- RBP
// | clobbered M |
// | ............ |
// | clobbered 1 |
// | clobbered 0 |
// | spill slot N |
// | ............ |
// | spill slot 0 |
// +-----------------+ <--- RSP
// (low address)
if size := m.spillSlotSize; size > 0 {
// Simply increase the RSP to free the spill slots.
// add $size, %rsp
cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(uint32(size)), rspVReg, true))
}
//
// (high address)
// +-----------------+ +-----------------+
// | ....... | | ....... |
// | ret Y | | ret Y |
// | ....... | | ....... |
// | ret 0 | | ret 0 |
// | arg X | | arg X |
// | ....... | | ....... |
// | arg 1 | | arg 1 |
// | arg 0 | | arg 0 |
// | ReturnAddress | | ReturnAddress |
// | Caller_RBP | | Caller_RBP |
// RBP ---> +-----------------+ ========> +-----------------+ <---- RSP, RBP
// | clobbered M |
// | ............ |
// | clobbered 1 |
// | clobbered 0 |
// RSP ---> +-----------------+
// (low address)
//
if regs := m.clobberedRegs; len(regs) > 0 {
for _, r := range regs {
if r.RegType() == regalloc.RegTypeInt {
cur = linkInstr(cur, m.allocateInstr().asPop64(r))
} else {
// Pop the XMM register is not supported by the POP instruction.
pop := m.allocateInstr().asXmmUnaryRmR(
sseOpcodeMovdqu, newOperandMem(m.newAmodeImmReg(0, rspVReg)), r,
)
cur = linkInstr(cur, pop)
cur = m.addRSP(16, cur)
}
}
}
// Now roll back the RSP to RBP, and pop the caller's RBP.
cur = m.revertRBPRSP(cur)
linkInstr(cur, prevNext)
}
func (m *machine) addRSP(offset int32, cur *instruction) *instruction {
if offset == 0 {
return cur
}
opcode := aluRmiROpcodeAdd
if offset < 0 {
opcode = aluRmiROpcodeSub
offset = -offset
}
return linkInstr(cur, m.allocateInstr().asAluRmiR(opcode, newOperandImm32(uint32(offset)), rspVReg, true))
}
func (m *machine) setupRBPRSP(cur *instruction) *instruction {
cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(rbpVReg)))
cur = linkInstr(cur, m.allocateInstr().asMovRR(rspVReg, rbpVReg, true))
return cur
}
func (m *machine) revertRBPRSP(cur *instruction) *instruction {
cur = linkInstr(cur, m.allocateInstr().asMovRR(rbpVReg, rspVReg, true))
cur = linkInstr(cur, m.allocateInstr().asPop64(rbpVReg))
return cur
}

View File

@ -0,0 +1,153 @@
package amd64
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
// InsertMoveBefore implements backend.RegAllocFunctionMachine.
func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {
typ := src.RegType()
if typ != dst.RegType() {
panic("BUG: src and dst must have the same type")
}
mov := m.allocateInstr()
if typ == regalloc.RegTypeInt {
mov.asMovRR(src, dst, true)
} else {
mov.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandReg(src), dst)
}
cur := instr.prev
prevNext := cur.next
cur = linkInstr(cur, mov)
linkInstr(cur, prevNext)
}
// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine.
func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
if !v.IsRealReg() {
panic("BUG: VReg must be backed by real reg to be stored")
}
typ := m.c.TypeOf(v)
var prevNext, cur *instruction
if after {
cur, prevNext = instr, instr.next
} else {
cur, prevNext = instr.prev, instr
}
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
store := m.allocateInstr()
mem := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg))
switch typ {
case ssa.TypeI32:
store.asMovRM(v, mem, 4)
case ssa.TypeI64:
store.asMovRM(v, mem, 8)
case ssa.TypeF32:
store.asXmmMovRM(sseOpcodeMovss, v, mem)
case ssa.TypeF64:
store.asXmmMovRM(sseOpcodeMovsd, v, mem)
case ssa.TypeV128:
store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
}
cur = linkInstr(cur, store)
return linkInstr(cur, prevNext)
}
// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine.
func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
if !v.IsRealReg() {
panic("BUG: VReg must be backed by real reg to be stored")
}
typ := m.c.TypeOf(v)
var prevNext, cur *instruction
if after {
cur, prevNext = instr, instr.next
} else {
cur, prevNext = instr.prev, instr
}
// Load the value to the temporary.
load := m.allocateInstr()
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
a := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg))
switch typ {
case ssa.TypeI32:
load.asMovzxRmR(extModeLQ, a, v)
case ssa.TypeI64:
load.asMov64MR(a, v)
case ssa.TypeF32:
load.asXmmUnaryRmR(sseOpcodeMovss, a, v)
case ssa.TypeF64:
load.asXmmUnaryRmR(sseOpcodeMovsd, a, v)
case ssa.TypeV128:
load.asXmmUnaryRmR(sseOpcodeMovdqu, a, v)
default:
panic("BUG")
}
cur = linkInstr(cur, load)
return linkInstr(cur, prevNext)
}
// ClobberedRegisters implements backend.RegAllocFunctionMachine.
func (m *machine) ClobberedRegisters(regs []regalloc.VReg) {
m.clobberedRegs = append(m.clobberedRegs[:0], regs...)
}
// Swap implements backend.RegAllocFunctionMachine.
func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {
if x1.RegType() == regalloc.RegTypeInt {
prevNext := cur.next
xc := m.allocateInstr().asXCHG(x1, newOperandReg(x2), 8)
cur = linkInstr(cur, xc)
linkInstr(cur, prevNext)
} else {
if tmp.Valid() {
prevNext := cur.next
m.InsertMoveBefore(tmp, x1, prevNext)
m.InsertMoveBefore(x1, x2, prevNext)
m.InsertMoveBefore(x2, tmp, prevNext)
} else {
prevNext := cur.next
r2 := x2.RealReg()
// Temporarily spill x1 to stack.
cur = m.InsertStoreRegisterAt(x1, cur, true).prev
// Then move x2 to x1.
cur = linkInstr(cur, m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqa, newOperandReg(x2), x1))
linkInstr(cur, prevNext)
// Then reload the original value on x1 from stack to r2.
m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true)
}
}
}
// LastInstrForInsertion implements backend.RegAllocFunctionMachine.
func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction {
cur := end
for cur.kind == nop0 {
cur = cur.prev
if cur == begin {
return end
}
}
switch cur.kind {
case jmp:
return cur
default:
return end
}
}
// SSABlockLabel implements backend.RegAllocFunctionMachine.
func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label {
return m.ectx.SsaBlockIDToLabels[id]
}

View File

@ -0,0 +1,992 @@
package amd64
import (
"fmt"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
var swizzleMask = [16]byte{
0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
}
func (m *machine) lowerSwizzle(x, y ssa.Value, ret ssa.Value) {
masklabel := m.getOrAllocateConstLabel(&m.constSwizzleMaskConstIndex, swizzleMask[:])
// Load mask to maskReg.
maskReg := m.c.AllocateVReg(ssa.TypeV128)
loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(masklabel)), maskReg)
m.insert(loadMask)
// Copy x and y to tmp registers.
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
tmpDst := m.copyToTmp(xx.reg())
yy := m.getOperand_Reg(m.c.ValueDefinition(y))
tmpX := m.copyToTmp(yy.reg())
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddusb, newOperandReg(maskReg), tmpX))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpX), tmpDst))
// Copy the result to the destination register.
m.copyTo(tmpDst, m.c.VRegOf(ret))
}
func (m *machine) lowerInsertLane(x, y ssa.Value, index byte, ret ssa.Value, lane ssa.VecLane) {
// Copy x to tmp.
tmpDst := m.c.AllocateVReg(ssa.TypeV128)
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, m.getOperand_Mem_Reg(m.c.ValueDefinition(x)), tmpDst))
yy := m.getOperand_Reg(m.c.ValueDefinition(y))
switch lane {
case ssa.VecLaneI8x16:
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, index, yy, tmpDst))
case ssa.VecLaneI16x8:
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, index, yy, tmpDst))
case ssa.VecLaneI32x4:
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, index, yy, tmpDst))
case ssa.VecLaneI64x2:
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, index, yy, tmpDst))
case ssa.VecLaneF32x4:
// In INSERTPS instruction, the destination index is encoded at 4 and 5 bits of the argument.
// See https://www.felixcloutier.com/x86/insertps
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeInsertps, index<<4, yy, tmpDst))
case ssa.VecLaneF64x2:
if index == 0 {
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, yy, tmpDst))
} else {
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMovlhps, yy, tmpDst))
}
default:
panic(fmt.Sprintf("invalid lane type: %s", lane))
}
m.copyTo(tmpDst, m.c.VRegOf(ret))
}
func (m *machine) lowerExtractLane(x ssa.Value, index byte, signed bool, ret ssa.Value, lane ssa.VecLane) {
// Pextr variants are used to extract a lane from a vector register.
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
tmpDst := m.c.AllocateVReg(ret.Type())
m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst))
switch lane {
case ssa.VecLaneI8x16:
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrb, index, xx, tmpDst))
if signed {
m.insert(m.allocateInstr().asMovsxRmR(extModeBL, newOperandReg(tmpDst), tmpDst))
} else {
m.insert(m.allocateInstr().asMovzxRmR(extModeBL, newOperandReg(tmpDst), tmpDst))
}
case ssa.VecLaneI16x8:
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrw, index, xx, tmpDst))
if signed {
m.insert(m.allocateInstr().asMovsxRmR(extModeWL, newOperandReg(tmpDst), tmpDst))
} else {
m.insert(m.allocateInstr().asMovzxRmR(extModeWL, newOperandReg(tmpDst), tmpDst))
}
case ssa.VecLaneI32x4:
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrd, index, xx, tmpDst))
case ssa.VecLaneI64x2:
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, index, xx, tmpDst))
case ssa.VecLaneF32x4:
if index == 0 {
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovss, xx, tmpDst))
} else {
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, index, xx, tmpDst))
}
case ssa.VecLaneF64x2:
if index == 0 {
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, xx, tmpDst))
} else {
m.copyTo(xx.reg(), tmpDst)
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0b00_00_11_10, newOperandReg(tmpDst), tmpDst))
}
default:
panic(fmt.Sprintf("invalid lane type: %s", lane))
}
m.copyTo(tmpDst, m.c.VRegOf(ret))
}
var sqmulRoundSat = [16]byte{
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
}
func (m *machine) lowerSqmulRoundSat(x, y, ret ssa.Value) {
// See https://github.com/WebAssembly/simd/pull/365 for the following logic.
maskLabel := m.getOrAllocateConstLabel(&m.constSqmulRoundSatIndex, sqmulRoundSat[:])
tmp := m.c.AllocateVReg(ssa.TypeV128)
loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp)
m.insert(loadMask)
xx, yy := m.getOperand_Reg(m.c.ValueDefinition(x)), m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
tmpX := m.copyToTmp(xx.reg())
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmulhrsw, yy, tmpX))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmpX), tmp))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmpX))
m.copyTo(tmpX, m.c.VRegOf(ret))
}
func (m *machine) lowerVUshr(x, y, ret ssa.Value, lane ssa.VecLane) {
switch lane {
case ssa.VecLaneI8x16:
m.lowerVUshri8x16(x, y, ret)
case ssa.VecLaneI16x8, ssa.VecLaneI32x4, ssa.VecLaneI64x2:
m.lowerShr(x, y, ret, lane, false)
default:
panic(fmt.Sprintf("invalid lane type: %s", lane))
}
}
// i8x16LogicalSHRMaskTable is necessary for emulating non-existent packed bytes logical right shifts on amd64.
// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits.
var i8x16LogicalSHRMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes.
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // for 1 shift
0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, // for 2 shift
0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, // for 3 shift
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, // for 4 shift
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // for 5 shift
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, // for 6 shift
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // for 7 shift
}
func (m *machine) lowerVUshri8x16(x, y, ret ssa.Value) {
tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
// Load the modulo 8 mask to tmpReg.
m.lowerIconst(tmpGpReg, 0x7, false)
// Take the modulo 8 of the shift amount.
shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y))
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, tmpGpReg, false))
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
xx := m.copyToTmp(_xx.reg())
vecTmp := m.c.AllocateVReg(ssa.TypeV128)
m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), vecTmp, false))
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrlw, newOperandReg(vecTmp), xx))
maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16LogicalSHRMaskTableIndex, i8x16LogicalSHRMaskTable[:])
base := m.c.AllocateVReg(ssa.TypeI64)
lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base)
m.insert(lea)
// Shift tmpGpReg by 4 to multiply the shift amount by 16.
m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false))
mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0)
loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), vecTmp)
m.insert(loadMask)
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(vecTmp), xx))
m.copyTo(xx, m.c.VRegOf(ret))
}
func (m *machine) lowerVSshr(x, y, ret ssa.Value, lane ssa.VecLane) {
switch lane {
case ssa.VecLaneI8x16:
m.lowerVSshri8x16(x, y, ret)
case ssa.VecLaneI16x8, ssa.VecLaneI32x4:
m.lowerShr(x, y, ret, lane, true)
case ssa.VecLaneI64x2:
m.lowerVSshri64x2(x, y, ret)
default:
panic(fmt.Sprintf("invalid lane type: %s", lane))
}
}
func (m *machine) lowerVSshri8x16(x, y, ret ssa.Value) {
shiftAmtReg := m.c.AllocateVReg(ssa.TypeI32)
// Load the modulo 8 mask to tmpReg.
m.lowerIconst(shiftAmtReg, 0x7, false)
// Take the modulo 8 of the shift amount.
shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y))
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, shiftAmtReg, false))
// Copy the x value to two temporary registers.
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
xx := m.copyToTmp(_xx.reg())
vecTmp := m.c.AllocateVReg(ssa.TypeV128)
m.copyTo(xx, vecTmp)
// Assuming that we have
// xx = [b1, ..., b16]
// vecTmp = [b1, ..., b16]
// at this point, then we use PUNPCKLBW and PUNPCKHBW to produce:
// xx = [b1, b1, b2, b2, ..., b8, b8]
// vecTmp = [b9, b9, b10, b10, ..., b16, b16]
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpcklbw, newOperandReg(xx), xx))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpckhbw, newOperandReg(vecTmp), vecTmp))
// Adding 8 to the shift amount, and then move the amount to vecTmp2.
vecTmp2 := m.c.AllocateVReg(ssa.TypeV128)
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(8), shiftAmtReg, false))
m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(shiftAmtReg), vecTmp2, false))
// Perform the word packed arithmetic right shifts on vreg and vecTmp.
// This changes these two registers as:
// xx = [xxx, b1 >> s, xxx, b2 >> s, ..., xxx, b8 >> s]
// vecTmp = [xxx, b9 >> s, xxx, b10 >> s, ..., xxx, b16 >> s]
// where xxx is 1 or 0 depending on each byte's sign, and ">>" is the arithmetic shift on a byte.
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), xx))
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), vecTmp))
// Finally, we can get the result by packing these two word vectors.
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePacksswb, newOperandReg(vecTmp), xx))
m.copyTo(xx, m.c.VRegOf(ret))
}
func (m *machine) lowerVSshri64x2(x, y, ret ssa.Value) {
// Load the shift amount to RCX.
shiftAmt := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, shiftAmt, rcxVReg))
tmpGp := m.c.AllocateVReg(ssa.TypeI64)
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
xxReg := m.copyToTmp(_xx.reg())
m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp))
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 0, newOperandReg(xxReg), tmpGp))
m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true))
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), xxReg))
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 1, newOperandReg(xxReg), tmpGp))
m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true))
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), xxReg))
m.copyTo(xxReg, m.c.VRegOf(ret))
}
func (m *machine) lowerShr(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) {
var modulo uint64
var shiftOp sseOpcode
switch lane {
case ssa.VecLaneI16x8:
modulo = 0xf
if signed {
shiftOp = sseOpcodePsraw
} else {
shiftOp = sseOpcodePsrlw
}
case ssa.VecLaneI32x4:
modulo = 0x1f
if signed {
shiftOp = sseOpcodePsrad
} else {
shiftOp = sseOpcodePsrld
}
case ssa.VecLaneI64x2:
modulo = 0x3f
if signed {
panic("BUG")
}
shiftOp = sseOpcodePsrlq
default:
panic(fmt.Sprintf("invalid lane type: %s", lane))
}
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
xx := m.copyToTmp(_xx.reg())
tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
// Load the modulo 8 mask to tmpReg.
m.lowerIconst(tmpGpReg, modulo, false)
// Take the modulo 8 of the shift amount.
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd,
m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false))
// And move it to a xmm register.
tmpVec := m.c.AllocateVReg(ssa.TypeV128)
m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false))
// Then do the actual shift.
m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx))
m.copyTo(xx, m.c.VRegOf(ret))
}
func (m *machine) lowerVIshl(x, y, ret ssa.Value, lane ssa.VecLane) {
var modulo uint64
var shiftOp sseOpcode
var isI8x16 bool
switch lane {
case ssa.VecLaneI8x16:
isI8x16 = true
modulo = 0x7
shiftOp = sseOpcodePsllw
case ssa.VecLaneI16x8:
modulo = 0xf
shiftOp = sseOpcodePsllw
case ssa.VecLaneI32x4:
modulo = 0x1f
shiftOp = sseOpcodePslld
case ssa.VecLaneI64x2:
modulo = 0x3f
shiftOp = sseOpcodePsllq
default:
panic(fmt.Sprintf("invalid lane type: %s", lane))
}
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
xx := m.copyToTmp(_xx.reg())
tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
// Load the modulo 8 mask to tmpReg.
m.lowerIconst(tmpGpReg, modulo, false)
// Take the modulo 8 of the shift amount.
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd,
m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false))
// And move it to a xmm register.
tmpVec := m.c.AllocateVReg(ssa.TypeV128)
m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false))
// Then do the actual shift.
m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx))
if isI8x16 {
maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16SHLMaskTableIndex, i8x16SHLMaskTable[:])
base := m.c.AllocateVReg(ssa.TypeI64)
lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base)
m.insert(lea)
// Shift tmpGpReg by 4 to multiply the shift amount by 16.
m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false))
mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0)
loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), tmpVec)
m.insert(loadMask)
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(tmpVec), xx))
}
m.copyTo(xx, m.c.VRegOf(ret))
}
// i8x16SHLMaskTable is necessary for emulating non-existent packed bytes left shifts on amd64.
// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits.
var i8x16SHLMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes.
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift
0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, // for 1 shift
0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, // for 2 shift
0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, // for 3 shift
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // for 4 shift
0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, // for 5 shift
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, // for 6 shift
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, // for 7 shift
}
func (m *machine) lowerVRound(x, ret ssa.Value, imm byte, _64 bool) {
xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
var round sseOpcode
if _64 {
round = sseOpcodeRoundpd
} else {
round = sseOpcodeRoundps
}
m.insert(m.allocateInstr().asXmmUnaryRmRImm(round, imm, xx, m.c.VRegOf(ret)))
}
var (
allOnesI8x16 = [16]byte{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}
allOnesI16x8 = [16]byte{0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0}
extAddPairwiseI16x8uMask1 = [16]byte{0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80}
extAddPairwiseI16x8uMask2 = [16]byte{0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00}
)
func (m *machine) lowerExtIaddPairwise(x, ret ssa.Value, srcLane ssa.VecLane, signed bool) {
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
xx := m.copyToTmp(_xx.reg())
switch srcLane {
case ssa.VecLaneI8x16:
allOneReg := m.c.AllocateVReg(ssa.TypeV128)
mask := m.getOrAllocateConstLabel(&m.constAllOnesI8x16Index, allOnesI8x16[:])
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOneReg))
var resultReg regalloc.VReg
if signed {
resultReg = allOneReg
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(xx), resultReg))
} else {
// Interpreter tmp (all ones) as signed byte meaning that all the multiply-add is unsigned.
resultReg = xx
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(allOneReg), resultReg))
}
m.copyTo(resultReg, m.c.VRegOf(ret))
case ssa.VecLaneI16x8:
if signed {
allOnesReg := m.c.AllocateVReg(ssa.TypeV128)
mask := m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:])
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOnesReg))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(allOnesReg), xx))
m.copyTo(xx, m.c.VRegOf(ret))
} else {
maskReg := m.c.AllocateVReg(ssa.TypeV128)
mask := m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask1Index, extAddPairwiseI16x8uMask1[:])
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
// Flip the sign bits on xx.
//
// Assuming that xx = [w1, ..., w8], now we have,
// xx[i] = int8(-w1) for i = 0...8
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(maskReg), xx))
mask = m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:])
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
// For i = 0,..4 (as this results in i32x4 lanes), now we have
// xx[i] = int32(-wn + -w(n+1)) = int32(-(wn + w(n+1)))
// c.assembler.CompileRegisterToRegister(amd64.PMADDWD, tmp, vr)
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(maskReg), xx))
mask = m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask2Index, extAddPairwiseI16x8uMask2[:])
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
// vr[i] = int32(-(wn + w(n+1))) + int32(math.MaxInt16+1) = int32((wn + w(n+1))) = uint32(wn + w(n+1)).
// c.assembler.CompileRegisterToRegister(amd64.PADDD, tmp, vr)
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(maskReg), xx))
m.copyTo(xx, m.c.VRegOf(ret))
}
default:
panic(fmt.Sprintf("invalid lane type: %s", srcLane))
}
}
func (m *machine) lowerWidenLow(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
var sseOp sseOpcode
switch lane {
case ssa.VecLaneI8x16:
if signed {
sseOp = sseOpcodePmovsxbw
} else {
sseOp = sseOpcodePmovzxbw
}
case ssa.VecLaneI16x8:
if signed {
sseOp = sseOpcodePmovsxwd
} else {
sseOp = sseOpcodePmovzxwd
}
case ssa.VecLaneI32x4:
if signed {
sseOp = sseOpcodePmovsxdq
} else {
sseOp = sseOpcodePmovzxdq
}
default:
panic(fmt.Sprintf("invalid lane type: %s", lane))
}
xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, xx, m.c.VRegOf(ret)))
}
func (m *machine) lowerWidenHigh(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
tmp := m.c.AllocateVReg(ssa.TypeV128)
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
m.copyTo(xx.reg(), tmp)
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePalignr, 8, newOperandReg(tmp), tmp))
var sseOp sseOpcode
switch lane {
case ssa.VecLaneI8x16:
if signed {
sseOp = sseOpcodePmovsxbw
} else {
sseOp = sseOpcodePmovzxbw
}
case ssa.VecLaneI16x8:
if signed {
sseOp = sseOpcodePmovsxwd
} else {
sseOp = sseOpcodePmovzxwd
}
case ssa.VecLaneI32x4:
if signed {
sseOp = sseOpcodePmovsxdq
} else {
sseOp = sseOpcodePmovzxdq
}
default:
panic(fmt.Sprintf("invalid lane type: %s", lane))
}
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, newOperandReg(tmp), m.c.VRegOf(ret)))
}
func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, ret ssa.Value, lane ssa.VecLane) {
tmpDst, tmpGp := m.c.AllocateVReg(ssa.TypeV128), m.c.AllocateVReg(ssa.TypeI64)
am := newOperandMem(m.lowerToAddressMode(ptr, offset))
m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst))
switch lane {
case ssa.VecLaneI8x16:
m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, am, tmpGp))
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, 0, newOperandReg(tmpGp), tmpDst))
tmpZeroVec := m.c.AllocateVReg(ssa.TypeV128)
m.insert(m.allocateInstr().asZeros(tmpZeroVec))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpZeroVec), tmpDst))
case ssa.VecLaneI16x8:
m.insert(m.allocateInstr().asMovzxRmR(extModeWQ, am, tmpGp))
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 0, newOperandReg(tmpGp), tmpDst))
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 1, newOperandReg(tmpGp), tmpDst))
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
case ssa.VecLaneI32x4:
m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, am, tmpGp))
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, 0, newOperandReg(tmpGp), tmpDst))
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
case ssa.VecLaneI64x2:
m.insert(m.allocateInstr().asMov64MR(am, tmpGp))
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), tmpDst))
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), tmpDst))
default:
panic(fmt.Sprintf("invalid lane type: %s", lane))
}
m.copyTo(tmpDst, m.c.VRegOf(ret))
}
var f64x2CvtFromIMask = [16]byte{
0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
}
func (m *machine) lowerVFcvtFromInt(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
switch lane {
case ssa.VecLaneF32x4:
if signed {
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, xx, m.c.VRegOf(ret)))
} else {
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
// Copy the value to two temporary registers.
tmp := m.copyToTmp(xx.reg())
tmp2 := m.copyToTmp(xx.reg())
// Clear the higher 16 bits of each 32-bit element.
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePslld, newOperandImm32(0xa), tmp))
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0xa), tmp))
// Subtract the higher 16-bits from tmp2: clear the lower 16-bits of tmp2.
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubd, newOperandReg(tmp), tmp2))
// Convert the lower 16-bits in tmp.
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp))
// Left shift by one and convert tmp2, meaning that halved conversion result of higher 16-bits in tmp2.
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(1), tmp2))
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp2), tmp2))
// Double the converted halved higher 16bits.
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp2), tmp2))
// Get the conversion result by add tmp (holding lower 16-bit conversion) into tmp2.
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp), tmp2))
m.copyTo(tmp2, m.c.VRegOf(ret))
}
case ssa.VecLaneF64x2:
if signed {
xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2pd, xx, m.c.VRegOf(ret)))
} else {
maskReg := m.c.AllocateVReg(ssa.TypeV128)
maskLabel := m.getOrAllocateConstLabel(&m.constF64x2CvtFromIMaskIndex, f64x2CvtFromIMask[:])
// maskReg = [0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg))
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
xx := m.copyToTmp(_xx.reg())
// Given that we have xx = [d1, d2, d3, d4], this results in
// xx = [d1, [0x00, 0x00, 0x30, 0x43], d2, [0x00, 0x00, 0x30, 0x43]]
// = [float64(uint32(d1)) + 0x1.0p52, float64(uint32(d2)) + 0x1.0p52]
// ^See https://stackoverflow.com/questions/13269523/can-all-32-bit-ints-be-exactly-represented-as-a-double
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeUnpcklps, newOperandReg(maskReg), xx))
// maskReg = [float64(0x1.0p52), float64(0x1.0p52)]
maskLabel = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:])
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg))
// Now, we get the result as
// xx = [float64(uint32(d1)), float64(uint32(d2))]
// because the following equality always satisfies:
// float64(0x1.0p52 + float64(uint32(x))) - float64(0x1.0p52 + float64(uint32(y))) = float64(uint32(x)) - float64(uint32(y))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubpd, newOperandReg(maskReg), xx))
m.copyTo(xx, m.c.VRegOf(ret))
}
default:
panic(fmt.Sprintf("invalid lane type: %s", lane))
}
}
var (
// i32sMaxOnF64x2 holds math.MaxInt32(=2147483647.0) on two f64 lanes.
i32sMaxOnF64x2 = [16]byte{
0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0)
0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0)
}
// i32sMaxOnF64x2 holds math.MaxUint32(=4294967295.0) on two f64 lanes.
i32uMaxOnF64x2 = [16]byte{
0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0)
0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0)
}
// twop52 holds two float64(0x1.0p52) on two f64 lanes. 0x1.0p52 is special in the sense that
// with this exponent, the mantissa represents a corresponding uint32 number, and arithmetics,
// like addition or subtraction, the resulted floating point holds exactly the same
// bit representations in 32-bit integer on its mantissa.
//
// Note: the name twop52 is common across various compiler ecosystem.
// E.g. https://github.com/llvm/llvm-project/blob/92ab024f81e5b64e258b7c3baaf213c7c26fcf40/compiler-rt/lib/builtins/floatdidf.c#L28
// E.g. https://opensource.apple.com/source/clang/clang-425.0.24/src/projects/compiler-rt/lib/floatdidf.c.auto.html
twop52 = [16]byte{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52)
}
)
func (m *machine) lowerVFcvtToIntSat(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
xx := m.copyToTmp(_xx.reg())
switch lane {
case ssa.VecLaneF32x4:
if signed {
tmp := m.copyToTmp(xx)
// Assuming we have xx = [v1, v2, v3, v4].
//
// Set all bits if lane is not NaN on tmp.
// tmp[i] = 0xffffffff if vi != NaN
// = 0 if vi == NaN
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp))
// Clear NaN lanes on xx, meaning that
// xx[i] = vi if vi != NaN
// 0 if vi == NaN
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp), xx))
// tmp[i] = ^vi if vi != NaN
// = 0xffffffff if vi == NaN
// which means that tmp[i] & 0x80000000 != 0 if and only if vi is negative.
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeXorps, newOperandReg(xx), tmp))
// xx[i] = int32(vi) if vi != NaN and xx is not overflowing.
// = 0x80000000 if vi != NaN and xx is overflowing (See https://www.felixcloutier.com/x86/cvttps2dq)
// = 0 if vi == NaN
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx))
// Below, we have to convert 0x80000000 into 0x7FFFFFFF for positive overflowing lane.
//
// tmp[i] = 0x80000000 if vi is positive
// = any satisfying any&0x80000000 = 0 if vi is negative or zero.
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(xx), tmp))
// Arithmetic right shifting tmp by 31, meaning that we have
// tmp[i] = 0xffffffff if vi is positive, 0 otherwise.
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrad, newOperandImm32(0x1f), tmp))
// Flipping 0x80000000 if vi is positive, otherwise keep intact.
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), xx))
} else {
tmp := m.c.AllocateVReg(ssa.TypeV128)
m.insert(m.allocateInstr().asZeros(tmp))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxps, newOperandReg(tmp), xx))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmp), tmp))
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0x1), tmp))
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp))
tmp2 := m.copyToTmp(xx)
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubps, newOperandReg(tmp), tmp2))
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredLE_OS), newOperandReg(tmp2), tmp))
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(tmp2), tmp2))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp2))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaxsd, newOperandReg(tmp), tmp2))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(tmp2), xx))
}
case ssa.VecLaneF64x2:
tmp2 := m.c.AllocateVReg(ssa.TypeV128)
if signed {
tmp := m.copyToTmp(xx)
// Set all bits for non-NaN lanes, zeros otherwise.
// I.e. tmp[i] = 0xffffffff_ffffffff if vi != NaN, 0 otherwise.
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmppd, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp))
maskLabel := m.getOrAllocateConstLabel(&m.constI32sMaxOnF64x2Index, i32sMaxOnF64x2[:])
// Load the 2147483647 into tmp2's each lane.
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp2))
// tmp[i] = 2147483647 if vi != NaN, 0 otherwise.
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp2), tmp))
// MINPD returns the source register's value as-is, so we have
// xx[i] = vi if vi != NaN
// = 0 if vi == NaN
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp), xx))
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttpd2dq, newOperandReg(xx), xx))
} else {
tmp := m.c.AllocateVReg(ssa.TypeV128)
m.insert(m.allocateInstr().asZeros(tmp))
// xx[i] = vi if vi != NaN && vi > 0
// = 0 if vi == NaN || vi <= 0
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxpd, newOperandReg(tmp), xx))
// tmp2[i] = float64(math.MaxUint32) = math.MaxUint32
maskIndex := m.getOrAllocateConstLabel(&m.constI32uMaxOnF64x2Index, i32uMaxOnF64x2[:])
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2))
// xx[i] = vi if vi != NaN && vi > 0 && vi <= math.MaxUint32
// = 0 otherwise
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp2), xx))
// Round the floating points into integer.
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeRoundpd, 0x3, newOperandReg(xx), xx))
// tmp2[i] = float64(0x1.0p52)
maskIndex = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:])
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2))
// xx[i] = float64(0x1.0p52) + float64(uint32(vi)) if vi != NaN && vi > 0 && vi <= math.MaxUint32
// = 0 otherwise
//
// This means that xx[i] holds exactly the same bit of uint32(vi) in its lower 32-bits.
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddpd, newOperandReg(tmp2), xx))
// At this point, we have
// xx = [uint32(v0), float64(0x1.0p52), uint32(v1), float64(0x1.0p52)]
// tmp = [0, 0, 0, 0]
// as 32x4 lanes. Therefore, SHUFPS with 0b00_00_10_00 results in
// xx = [xx[00], xx[10], tmp[00], tmp[00]] = [xx[00], xx[10], 0, 0]
// meaning that for i = 0 and 1, we have
// xx[i] = uint32(vi) if vi != NaN && vi > 0 && vi <= math.MaxUint32
// = 0 otherwise.
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeShufps, 0b00_00_10_00, newOperandReg(tmp), xx))
}
default:
panic(fmt.Sprintf("invalid lane type: %s", lane))
}
m.copyTo(xx, m.c.VRegOf(ret))
}
func (m *machine) lowerNarrow(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) {
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
xx := m.copyToTmp(_xx.reg())
yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
var sseOp sseOpcode
switch lane {
case ssa.VecLaneI16x8:
if signed {
sseOp = sseOpcodePacksswb
} else {
sseOp = sseOpcodePackuswb
}
case ssa.VecLaneI32x4:
if signed {
sseOp = sseOpcodePackssdw
} else {
sseOp = sseOpcodePackusdw
}
default:
panic(fmt.Sprintf("invalid lane type: %s", lane))
}
m.insert(m.allocateInstr().asXmmRmR(sseOp, yy, xx))
m.copyTo(xx, m.c.VRegOf(ret))
}
func (m *machine) lowerWideningPairwiseDotProductS(x, y, ret ssa.Value) {
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
xx := m.copyToTmp(_xx.reg())
yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, yy, xx))
m.copyTo(xx, m.c.VRegOf(ret))
}
func (m *machine) lowerVIabs(instr *ssa.Instruction) {
x, lane := instr.ArgWithLane()
rd := m.c.VRegOf(instr.Return())
if lane == ssa.VecLaneI64x2 {
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
blendReg := xmm0VReg
m.insert(m.allocateInstr().asDefineUninitializedReg(blendReg))
tmp := m.copyToTmp(_xx.reg())
xx := m.copyToTmp(_xx.reg())
// Clear all bits on blendReg.
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(blendReg), blendReg))
// Subtract xx from blendMaskReg.
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubq, newOperandReg(xx), blendReg))
// Copy the subtracted value ^^ back into tmp.
m.copyTo(blendReg, xx)
m.insert(m.allocateInstr().asBlendvpd(newOperandReg(tmp), xx))
m.copyTo(xx, rd)
} else {
var vecOp sseOpcode
switch lane {
case ssa.VecLaneI8x16:
vecOp = sseOpcodePabsb
case ssa.VecLaneI16x8:
vecOp = sseOpcodePabsw
case ssa.VecLaneI32x4:
vecOp = sseOpcodePabsd
}
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
i := m.allocateInstr()
i.asXmmUnaryRmR(vecOp, rn, rd)
m.insert(i)
}
}
func (m *machine) lowerVIpopcnt(instr *ssa.Instruction) {
x := instr.Arg()
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
rd := m.c.VRegOf(instr.Return())
tmp1 := m.c.AllocateVReg(ssa.TypeV128)
m.lowerVconst(tmp1, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f)
// Copy input into tmp2.
tmp2 := m.copyToTmp(rn.reg())
// Given that we have:
// rm = [b1, ..., b16] where bn = hn:ln and hn and ln are higher and lower 4-bits of bn.
//
// Take PAND on tmp1 and tmp2, so that we mask out all the higher bits.
// tmp2 = [l1, ..., l16].
pand := m.allocateInstr()
pand.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp2)
m.insert(pand)
// Do logical (packed word) right shift by 4 on rm and PAND against the mask (tmp1); meaning that we have
// tmp3 = [h1, ...., h16].
tmp3 := m.copyToTmp(rn.reg())
psrlw := m.allocateInstr()
psrlw.asXmmRmiReg(sseOpcodePsrlw, newOperandImm32(4), tmp3)
m.insert(psrlw)
pand2 := m.allocateInstr()
pand2.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp3)
m.insert(pand2)
// Read the popcntTable into tmp4, and we have
// tmp4 = [0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04]
tmp4 := m.c.AllocateVReg(ssa.TypeV128)
m.lowerVconst(tmp4, 0x03_02_02_01_02_01_01_00, 0x04_03_03_02_03_02_02_01)
// Make a copy for later.
tmp5 := m.copyToTmp(tmp4)
// tmp4 = [popcnt(l1), ..., popcnt(l16)].
pshufb := m.allocateInstr()
pshufb.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp2), tmp4)
m.insert(pshufb)
pshufb2 := m.allocateInstr()
pshufb2.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp3), tmp5)
m.insert(pshufb2)
// tmp4 + tmp5 is the result.
paddb := m.allocateInstr()
paddb.asXmmRmR(sseOpcodePaddb, newOperandReg(tmp4), tmp5)
m.insert(paddb)
m.copyTo(tmp5, rd)
}
func (m *machine) lowerVImul(instr *ssa.Instruction) {
x, y, lane := instr.Arg2WithLane()
rd := m.c.VRegOf(instr.Return())
if lane == ssa.VecLaneI64x2 {
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
rm := m.getOperand_Reg(m.c.ValueDefinition(y))
// Assuming that we have
// rm = [p1, p2] = [p1_lo, p1_hi, p2_lo, p2_high]
// rn = [q1, q2] = [q1_lo, q1_hi, q2_lo, q2_high]
// where pN and qN are 64-bit (quad word) lane, and pN_lo, pN_hi, qN_lo and qN_hi are 32-bit (double word) lane.
// Copy rn into tmp1.
tmp1 := m.copyToTmp(rn.reg())
// And do the logical right shift by 32-bit on tmp1, which makes tmp1 = [0, p1_high, 0, p2_high]
shift := m.allocateInstr()
shift.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp1)
m.insert(shift)
// Execute "pmuludq rm,tmp1", which makes tmp1 = [p1_high*q1_lo, p2_high*q2_lo] where each lane is 64-bit.
mul := m.allocateInstr()
mul.asXmmRmR(sseOpcodePmuludq, rm, tmp1)
m.insert(mul)
// Copy rm value into tmp2.
tmp2 := m.copyToTmp(rm.reg())
// And do the logical right shift by 32-bit on tmp2, which makes tmp2 = [0, q1_high, 0, q2_high]
shift2 := m.allocateInstr()
shift2.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp2)
m.insert(shift2)
// Execute "pmuludq rm,tmp2", which makes tmp2 = [p1_lo*q1_high, p2_lo*q2_high] where each lane is 64-bit.
mul2 := m.allocateInstr()
mul2.asXmmRmR(sseOpcodePmuludq, rn, tmp2)
m.insert(mul2)
// Adds tmp1 and tmp2 and do the logical left shift by 32-bit,
// which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32, (p2_lo*q2_high+p2_high*q2_lo)<<32]
add := m.allocateInstr()
add.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp2), tmp1)
m.insert(add)
shift3 := m.allocateInstr()
shift3.asXmmRmiReg(sseOpcodePsllq, newOperandImm32(32), tmp1)
m.insert(shift3)
// Copy rm value into tmp3.
tmp3 := m.copyToTmp(rm.reg())
// "pmuludq rm,tmp3" makes tmp3 = [p1_lo*q1_lo, p2_lo*q2_lo] where each lane is 64-bit.
mul3 := m.allocateInstr()
mul3.asXmmRmR(sseOpcodePmuludq, rn, tmp3)
m.insert(mul3)
// Finally, we get the result by computing tmp1 + tmp3,
// which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32+p1_lo*q1_lo, (p2_lo*q2_high+p2_high*q2_lo)<<32+p2_lo*q2_lo]
add2 := m.allocateInstr()
add2.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp3), tmp1)
m.insert(add2)
m.copyTo(tmp1, rd)
} else {
var vecOp sseOpcode
switch lane {
case ssa.VecLaneI16x8:
vecOp = sseOpcodePmullw
case ssa.VecLaneI32x4:
vecOp = sseOpcodePmulld
default:
panic("unsupported: " + lane.String())
}
m.lowerVbBinOp(vecOp, x, y, instr.Return())
}
}

View File

@ -0,0 +1,346 @@
package amd64
import (
"fmt"
"unsafe"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
type operand struct {
kind operandKind
data uint64
}
type operandKind byte
const (
// operandKindReg is an operand which is an integer Register.
operandKindReg operandKind = iota + 1
// operandKindMem is a value in Memory.
// 32, 64, or 128 bit value.
operandKindMem
// operandKindImm32 is a signed-32-bit integer immediate value.
operandKindImm32
// operandKindLabel is a label.
operandKindLabel
)
// String implements fmt.Stringer.
func (o operandKind) String() string {
switch o {
case operandKindReg:
return "reg"
case operandKindMem:
return "mem"
case operandKindImm32:
return "imm32"
case operandKindLabel:
return "label"
default:
panic("BUG: invalid operand kind")
}
}
// format returns the string representation of the operand.
// _64 is only for the case where the operand is a register, and it's integer.
func (o *operand) format(_64 bool) string {
switch o.kind {
case operandKindReg:
return formatVRegSized(o.reg(), _64)
case operandKindMem:
return o.addressMode().String()
case operandKindImm32:
return fmt.Sprintf("$%d", int32(o.imm32()))
case operandKindLabel:
return backend.Label(o.imm32()).String()
default:
panic(fmt.Sprintf("BUG: invalid operand: %s", o.kind))
}
}
//go:inline
func (o *operand) reg() regalloc.VReg {
return regalloc.VReg(o.data)
}
//go:inline
func (o *operand) setReg(r regalloc.VReg) {
o.data = uint64(r)
}
//go:inline
func (o *operand) addressMode() *amode {
return wazevoapi.PtrFromUintptr[amode](uintptr(o.data))
}
//go:inline
func (o *operand) imm32() uint32 {
return uint32(o.data)
}
func (o *operand) label() backend.Label {
switch o.kind {
case operandKindLabel:
return backend.Label(o.data)
case operandKindMem:
mem := o.addressMode()
if mem.kind() != amodeRipRel {
panic("BUG: invalid label")
}
return backend.Label(mem.imm32)
default:
panic("BUG: invalid operand kind")
}
}
func newOperandLabel(label backend.Label) operand {
return operand{kind: operandKindLabel, data: uint64(label)}
}
func newOperandReg(r regalloc.VReg) operand {
return operand{kind: operandKindReg, data: uint64(r)}
}
func newOperandImm32(imm32 uint32) operand {
return operand{kind: operandKindImm32, data: uint64(imm32)}
}
func newOperandMem(amode *amode) operand {
return operand{kind: operandKindMem, data: uint64(uintptr(unsafe.Pointer(amode)))}
}
// amode is a memory operand (addressing mode).
type amode struct {
kindWithShift uint32
imm32 uint32
base regalloc.VReg
// For amodeRegRegShift:
index regalloc.VReg
}
type amodeKind byte
const (
// amodeRegRegShift calculates sign-extend-32-to-64(Immediate) + base
amodeImmReg amodeKind = iota + 1
// amodeImmRBP is the same as amodeImmReg, but the base register is fixed to RBP.
// The only differece is that it doesn't tell the register allocator to use RBP which is distracting for the
// register allocator.
amodeImmRBP
// amodeRegRegShift calculates sign-extend-32-to-64(Immediate) + base + (Register2 << Shift)
amodeRegRegShift
// amodeRipRel is a RIP-relative addressing mode specified by the label.
amodeRipRel
// TODO: there are other addressing modes such as the one without base register.
)
func (a *amode) kind() amodeKind {
return amodeKind(a.kindWithShift & 0xff)
}
func (a *amode) shift() byte {
return byte(a.kindWithShift >> 8)
}
func (a *amode) uses(rs *[]regalloc.VReg) {
switch a.kind() {
case amodeImmReg:
*rs = append(*rs, a.base)
case amodeRegRegShift:
*rs = append(*rs, a.base, a.index)
case amodeImmRBP, amodeRipRel:
default:
panic("BUG: invalid amode kind")
}
}
func (a *amode) nregs() int {
switch a.kind() {
case amodeImmReg:
return 1
case amodeRegRegShift:
return 2
case amodeImmRBP, amodeRipRel:
return 0
default:
panic("BUG: invalid amode kind")
}
}
func (a *amode) assignUses(i int, reg regalloc.VReg) {
switch a.kind() {
case amodeImmReg:
if i == 0 {
a.base = reg
} else {
panic("BUG: invalid amode assignment")
}
case amodeRegRegShift:
if i == 0 {
a.base = reg
} else if i == 1 {
a.index = reg
} else {
panic("BUG: invalid amode assignment")
}
default:
panic("BUG: invalid amode assignment")
}
}
func (m *machine) newAmodeImmReg(imm32 uint32, base regalloc.VReg) *amode {
ret := m.amodePool.Allocate()
*ret = amode{kindWithShift: uint32(amodeImmReg), imm32: imm32, base: base}
return ret
}
func (m *machine) newAmodeImmRBPReg(imm32 uint32) *amode {
ret := m.amodePool.Allocate()
*ret = amode{kindWithShift: uint32(amodeImmRBP), imm32: imm32, base: rbpVReg}
return ret
}
func (m *machine) newAmodeRegRegShift(imm32 uint32, base, index regalloc.VReg, shift byte) *amode {
if shift > 3 {
panic(fmt.Sprintf("BUG: invalid shift (must be 3>=): %d", shift))
}
ret := m.amodePool.Allocate()
*ret = amode{kindWithShift: uint32(amodeRegRegShift) | uint32(shift)<<8, imm32: imm32, base: base, index: index}
return ret
}
func (m *machine) newAmodeRipRel(label backend.Label) *amode {
ret := m.amodePool.Allocate()
*ret = amode{kindWithShift: uint32(amodeRipRel), imm32: uint32(label)}
return ret
}
// String implements fmt.Stringer.
func (a *amode) String() string {
switch a.kind() {
case amodeImmReg, amodeImmRBP:
if a.imm32 == 0 {
return fmt.Sprintf("(%s)", formatVRegSized(a.base, true))
}
return fmt.Sprintf("%d(%s)", int32(a.imm32), formatVRegSized(a.base, true))
case amodeRegRegShift:
shift := 1 << a.shift()
if a.imm32 == 0 {
return fmt.Sprintf(
"(%s,%s,%d)",
formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift)
}
return fmt.Sprintf(
"%d(%s,%s,%d)",
int32(a.imm32), formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift)
case amodeRipRel:
return fmt.Sprintf("%s(%%rip)", backend.Label(a.imm32))
default:
panic("BUG: invalid amode kind")
}
}
func (m *machine) getOperand_Mem_Reg(def *backend.SSAValueDefinition) (op operand) {
if def.IsFromBlockParam() {
return newOperandReg(def.BlkParamVReg)
}
if def.SSAValue().Type() == ssa.TypeV128 {
// SIMD instructions require strict memory alignment, so we don't support the memory operand for V128 at the moment.
return m.getOperand_Reg(def)
}
if m.c.MatchInstr(def, ssa.OpcodeLoad) {
instr := def.Instr
ptr, offset, _ := instr.LoadData()
op = newOperandMem(m.lowerToAddressMode(ptr, offset))
instr.MarkLowered()
return op
}
return m.getOperand_Reg(def)
}
func (m *machine) getOperand_Mem_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) {
if def.IsFromBlockParam() {
return newOperandReg(def.BlkParamVReg)
}
if m.c.MatchInstr(def, ssa.OpcodeLoad) {
instr := def.Instr
ptr, offset, _ := instr.LoadData()
op = newOperandMem(m.lowerToAddressMode(ptr, offset))
instr.MarkLowered()
return op
}
return m.getOperand_Imm32_Reg(def)
}
func (m *machine) getOperand_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) {
if def.IsFromBlockParam() {
return newOperandReg(def.BlkParamVReg)
}
instr := def.Instr
if instr.Constant() {
// If the operation is 64-bit, x64 sign-extends the 32-bit immediate value.
// Therefore, we need to check if the immediate value is within the 32-bit range and if the sign bit is set,
// we should not use the immediate value.
if op, ok := asImm32Operand(instr.ConstantVal(), instr.Return().Type() == ssa.TypeI32); ok {
instr.MarkLowered()
return op
}
}
return m.getOperand_Reg(def)
}
func asImm32Operand(val uint64, allowSignExt bool) (operand, bool) {
if imm32, ok := asImm32(val, allowSignExt); ok {
return newOperandImm32(imm32), true
}
return operand{}, false
}
func asImm32(val uint64, allowSignExt bool) (uint32, bool) {
u32val := uint32(val)
if uint64(u32val) != val {
return 0, false
}
if !allowSignExt && u32val&0x80000000 != 0 {
return 0, false
}
return u32val, true
}
func (m *machine) getOperand_Reg(def *backend.SSAValueDefinition) (op operand) {
var v regalloc.VReg
if def.IsFromBlockParam() {
v = def.BlkParamVReg
} else {
instr := def.Instr
if instr.Constant() {
// We inline all the constant instructions so that we could reduce the register usage.
v = m.lowerConstant(instr)
instr.MarkLowered()
} else {
if n := def.N; n == 0 {
v = m.c.VRegOf(instr.Return())
} else {
_, rs := instr.Returns()
v = m.c.VRegOf(rs[n-1])
}
}
}
return newOperandReg(v)
}

View File

@ -0,0 +1,11 @@
//go:build !tinygo
package amd64
import "reflect"
// setSliceLimits sets both Cap and Len for the given reflected slice.
func setSliceLimits(s *reflect.SliceHeader, limit uintptr) {
s.Len = int(limit)
s.Cap = int(limit)
}

View File

@ -0,0 +1,11 @@
//go:build tinygo
package amd64
import "reflect"
// setSliceLimits sets both Cap and Len for the given reflected slice.
func setSliceLimits(s *reflect.SliceHeader, limit uintptr) {
s.Len = limit
s.Len = limit
}

View File

@ -0,0 +1,181 @@
package amd64
import (
"fmt"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
)
// Amd64-specific registers.
const (
// rax is a gp register.
rax = regalloc.RealRegInvalid + 1 + iota
// rcx is a gp register.
rcx
// rdx is a gp register.
rdx
// rbx is a gp register.
rbx
// rsp is a gp register.
rsp
// rbp is a gp register.
rbp
// rsi is a gp register.
rsi
// rdi is a gp register.
rdi
// r8 is a gp register.
r8
// r9 is a gp register.
r9
// r10 is a gp register.
r10
// r11 is a gp register.
r11
// r12 is a gp register.
r12
// r13 is a gp register.
r13
// r14 is a gp register.
r14
// r15 is a gp register.
r15
// xmm0 is a vector register.
xmm0
// xmm1 is a vector register.
xmm1
// xmm2 is a vector register.
xmm2
// xmm3 is a vector register.
xmm3
// xmm4 is a vector register.
xmm4
// xmm5 is a vector register.
xmm5
// xmm6 is a vector register.
xmm6
// xmm7 is a vector register.
xmm7
// xmm8 is a vector register.
xmm8
// xmm9 is a vector register.
xmm9
// xmm10 is a vector register.
xmm10
// xmm11 is a vector register.
xmm11
// xmm12 is a vector register.
xmm12
// xmm13 is a vector register.
xmm13
// xmm14 is a vector register.
xmm14
// xmm15 is a vector register.
xmm15
)
var (
raxVReg = regalloc.FromRealReg(rax, regalloc.RegTypeInt)
rcxVReg = regalloc.FromRealReg(rcx, regalloc.RegTypeInt)
rdxVReg = regalloc.FromRealReg(rdx, regalloc.RegTypeInt)
rbxVReg = regalloc.FromRealReg(rbx, regalloc.RegTypeInt)
rspVReg = regalloc.FromRealReg(rsp, regalloc.RegTypeInt)
rbpVReg = regalloc.FromRealReg(rbp, regalloc.RegTypeInt)
rsiVReg = regalloc.FromRealReg(rsi, regalloc.RegTypeInt)
rdiVReg = regalloc.FromRealReg(rdi, regalloc.RegTypeInt)
r8VReg = regalloc.FromRealReg(r8, regalloc.RegTypeInt)
r9VReg = regalloc.FromRealReg(r9, regalloc.RegTypeInt)
r10VReg = regalloc.FromRealReg(r10, regalloc.RegTypeInt)
r11VReg = regalloc.FromRealReg(r11, regalloc.RegTypeInt)
r12VReg = regalloc.FromRealReg(r12, regalloc.RegTypeInt)
r13VReg = regalloc.FromRealReg(r13, regalloc.RegTypeInt)
r14VReg = regalloc.FromRealReg(r14, regalloc.RegTypeInt)
r15VReg = regalloc.FromRealReg(r15, regalloc.RegTypeInt)
xmm0VReg = regalloc.FromRealReg(xmm0, regalloc.RegTypeFloat)
xmm1VReg = regalloc.FromRealReg(xmm1, regalloc.RegTypeFloat)
xmm2VReg = regalloc.FromRealReg(xmm2, regalloc.RegTypeFloat)
xmm3VReg = regalloc.FromRealReg(xmm3, regalloc.RegTypeFloat)
xmm4VReg = regalloc.FromRealReg(xmm4, regalloc.RegTypeFloat)
xmm5VReg = regalloc.FromRealReg(xmm5, regalloc.RegTypeFloat)
xmm6VReg = regalloc.FromRealReg(xmm6, regalloc.RegTypeFloat)
xmm7VReg = regalloc.FromRealReg(xmm7, regalloc.RegTypeFloat)
xmm8VReg = regalloc.FromRealReg(xmm8, regalloc.RegTypeFloat)
xmm9VReg = regalloc.FromRealReg(xmm9, regalloc.RegTypeFloat)
xmm10VReg = regalloc.FromRealReg(xmm10, regalloc.RegTypeFloat)
xmm11VReg = regalloc.FromRealReg(xmm11, regalloc.RegTypeFloat)
xmm12VReg = regalloc.FromRealReg(xmm12, regalloc.RegTypeFloat)
xmm13VReg = regalloc.FromRealReg(xmm13, regalloc.RegTypeFloat)
xmm14VReg = regalloc.FromRealReg(xmm14, regalloc.RegTypeFloat)
xmm15VReg = regalloc.FromRealReg(xmm15, regalloc.RegTypeFloat)
)
var regNames = [...]string{
rax: "rax",
rcx: "rcx",
rdx: "rdx",
rbx: "rbx",
rsp: "rsp",
rbp: "rbp",
rsi: "rsi",
rdi: "rdi",
r8: "r8",
r9: "r9",
r10: "r10",
r11: "r11",
r12: "r12",
r13: "r13",
r14: "r14",
r15: "r15",
xmm0: "xmm0",
xmm1: "xmm1",
xmm2: "xmm2",
xmm3: "xmm3",
xmm4: "xmm4",
xmm5: "xmm5",
xmm6: "xmm6",
xmm7: "xmm7",
xmm8: "xmm8",
xmm9: "xmm9",
xmm10: "xmm10",
xmm11: "xmm11",
xmm12: "xmm12",
xmm13: "xmm13",
xmm14: "xmm14",
xmm15: "xmm15",
}
func formatVRegSized(r regalloc.VReg, _64 bool) string {
if r.IsRealReg() {
if r.RegType() == regalloc.RegTypeInt {
rr := r.RealReg()
orig := regNames[rr]
if rr <= rdi {
if _64 {
return "%" + orig
} else {
return "%e" + orig[1:]
}
} else {
if _64 {
return "%" + orig
} else {
return "%" + orig + "d"
}
}
} else {
return "%" + regNames[r.RealReg()]
}
} else {
if r.RegType() == regalloc.RegTypeInt {
if _64 {
return fmt.Sprintf("%%r%d?", r.ID())
} else {
return fmt.Sprintf("%%r%dd?", r.ID())
}
} else {
return fmt.Sprintf("%%xmm%d?", r.ID())
}
}
}

View File

@ -0,0 +1,128 @@
package amd64
import (
"encoding/binary"
"reflect"
"unsafe"
"github.com/tetratelabs/wazero/internal/wasmdebug"
)
func stackView(rbp, top uintptr) []byte {
var stackBuf []byte
{
// TODO: use unsafe.Slice after floor version is set to Go 1.20.
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))
hdr.Data = rbp
setSliceLimits(hdr, top-rbp)
}
return stackBuf
}
// UnwindStack implements wazevo.unwindStack.
func UnwindStack(_, rbp, top uintptr, returnAddresses []uintptr) []uintptr {
stackBuf := stackView(rbp, top)
for i := uint64(0); i < uint64(len(stackBuf)); {
// (high address)
// +-----------------+
// | ....... |
// | ret Y |
// | ....... |
// | ret 0 |
// | arg X |
// | ....... |
// | arg 1 |
// | arg 0 |
// | ReturnAddress |
// | Caller_RBP |
// +-----------------+ <---- Caller_RBP
// | ........... |
// | clobbered M |
// | ............ |
// | clobbered 0 |
// | spill slot N |
// | ............ |
// | spill slot 0 |
// | ReturnAddress |
// | Caller_RBP |
// +-----------------+ <---- RBP
// (low address)
callerRBP := binary.LittleEndian.Uint64(stackBuf[i:])
retAddr := binary.LittleEndian.Uint64(stackBuf[i+8:])
returnAddresses = append(returnAddresses, uintptr(retAddr))
i = callerRBP - uint64(rbp)
if len(returnAddresses) == wasmdebug.MaxFrames {
break
}
}
return returnAddresses
}
// GoCallStackView implements wazevo.goCallStackView.
func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
// (high address)
// +-----------------+ <----+
// | xxxxxxxxxxx | | ;; optional unused space to make it 16-byte aligned.
// ^ | arg[N]/ret[M] | |
// sliceSize | | ............ | | SizeInBytes/8
// | | arg[1]/ret[1] | |
// v | arg[0]/ret[0] | <----+
// | SizeInBytes |
// +-----------------+ <---- stackPointerBeforeGoCall
// (low address)
data := unsafe.Pointer(uintptr(unsafe.Pointer(stackPointerBeforeGoCall)) + 8)
size := *stackPointerBeforeGoCall / 8
return unsafe.Slice((*uint64)(data), int(size))
}
func AdjustClonedStack(oldRsp, oldTop, rsp, rbp, top uintptr) {
diff := uint64(rsp - oldRsp)
newBuf := stackView(rbp, top)
for i := uint64(0); i < uint64(len(newBuf)); {
// (high address)
// +-----------------+
// | ....... |
// | ret Y |
// | ....... |
// | ret 0 |
// | arg X |
// | ....... |
// | arg 1 |
// | arg 0 |
// | ReturnAddress |
// | Caller_RBP |
// +-----------------+ <---- Caller_RBP
// | ........... |
// | clobbered M |
// | ............ |
// | clobbered 0 |
// | spill slot N |
// | ............ |
// | spill slot 0 |
// | ReturnAddress |
// | Caller_RBP |
// +-----------------+ <---- RBP
// (low address)
callerRBP := binary.LittleEndian.Uint64(newBuf[i:])
if callerRBP == 0 {
// End of stack.
break
}
if i64 := int64(callerRBP); i64 < int64(oldRsp) || i64 >= int64(oldTop) {
panic("BUG: callerRBP is out of range")
}
if int(callerRBP) < 0 {
panic("BUG: callerRBP is negative")
}
adjustedCallerRBP := callerRBP + diff
if int(adjustedCallerRBP) < 0 {
panic("BUG: adjustedCallerRBP is negative")
}
binary.LittleEndian.PutUint64(newBuf[i:], adjustedCallerRBP)
i = adjustedCallerRBP - uint64(rbp)
}
}

View File

@ -0,0 +1,332 @@
package arm64
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
// References:
// * https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#arm64-architecture
// * https://developer.arm.com/documentation/102374/0101/Procedure-Call-Standard
var (
intParamResultRegs = []regalloc.RealReg{x0, x1, x2, x3, x4, x5, x6, x7}
floatParamResultRegs = []regalloc.RealReg{v0, v1, v2, v3, v4, v5, v6, v7}
)
var regInfo = &regalloc.RegisterInfo{
AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{
// We don't allocate:
// - x18: Reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers
// - x28: Reserved by Go runtime.
// - x27(=tmpReg): because of the reason described on tmpReg.
regalloc.RegTypeInt: {
x8, x9, x10, x11, x12, x13, x14, x15,
x16, x17, x19, x20, x21, x22, x23, x24, x25,
x26, x29, x30,
// These are the argument/return registers. Less preferred in the allocation.
x7, x6, x5, x4, x3, x2, x1, x0,
},
regalloc.RegTypeFloat: {
v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30,
// These are the argument/return registers. Less preferred in the allocation.
v7, v6, v5, v4, v3, v2, v1, v0,
},
},
CalleeSavedRegisters: regalloc.NewRegSet(
x19, x20, x21, x22, x23, x24, x25, x26, x28,
v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
),
CallerSavedRegisters: regalloc.NewRegSet(
x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x29, x30,
v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
),
RealRegToVReg: []regalloc.VReg{
x0: x0VReg, x1: x1VReg, x2: x2VReg, x3: x3VReg, x4: x4VReg, x5: x5VReg, x6: x6VReg, x7: x7VReg, x8: x8VReg, x9: x9VReg, x10: x10VReg, x11: x11VReg, x12: x12VReg, x13: x13VReg, x14: x14VReg, x15: x15VReg, x16: x16VReg, x17: x17VReg, x18: x18VReg, x19: x19VReg, x20: x20VReg, x21: x21VReg, x22: x22VReg, x23: x23VReg, x24: x24VReg, x25: x25VReg, x26: x26VReg, x27: x27VReg, x28: x28VReg, x29: x29VReg, x30: x30VReg,
v0: v0VReg, v1: v1VReg, v2: v2VReg, v3: v3VReg, v4: v4VReg, v5: v5VReg, v6: v6VReg, v7: v7VReg, v8: v8VReg, v9: v9VReg, v10: v10VReg, v11: v11VReg, v12: v12VReg, v13: v13VReg, v14: v14VReg, v15: v15VReg, v16: v16VReg, v17: v17VReg, v18: v18VReg, v19: v19VReg, v20: v20VReg, v21: v21VReg, v22: v22VReg, v23: v23VReg, v24: v24VReg, v25: v25VReg, v26: v26VReg, v27: v27VReg, v28: v28VReg, v29: v29VReg, v30: v30VReg, v31: v31VReg,
},
RealRegName: func(r regalloc.RealReg) string { return regNames[r] },
RealRegType: func(r regalloc.RealReg) regalloc.RegType {
if r < v0 {
return regalloc.RegTypeInt
}
return regalloc.RegTypeFloat
},
}
// ArgsResultsRegs implements backend.Machine.
func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) {
return intParamResultRegs, floatParamResultRegs
}
// LowerParams implements backend.FunctionABI.
func (m *machine) LowerParams(args []ssa.Value) {
a := m.currentABI
for i, ssaArg := range args {
if !ssaArg.Valid() {
continue
}
reg := m.compiler.VRegOf(ssaArg)
arg := &a.Args[i]
if arg.Kind == backend.ABIArgKindReg {
m.InsertMove(reg, arg.Reg, arg.Type)
} else {
// TODO: we could use pair load if there's consecutive loads for the same type.
//
// (high address)
// +-----------------+
// | ....... |
// | ret Y |
// | ....... |
// | ret 0 |
// | arg X |
// | ....... |
// | arg 1 |
// | arg 0 | <-|
// | ReturnAddress | |
// +-----------------+ |
// | ........... | |
// | clobbered M | | argStackOffset: is unknown at this point of compilation.
// | ............ | |
// | clobbered 0 | |
// | spill slot N | |
// | ........... | |
// | spill slot 0 | |
// SP---> +-----------------+ <-+
// (low address)
bits := arg.Type.Bits()
// At this point of compilation, we don't yet know how much space exist below the return address.
// So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation.
amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}
load := m.allocateInstr()
switch arg.Type {
case ssa.TypeI32, ssa.TypeI64:
load.asULoad(operandNR(reg), amode, bits)
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
load.asFpuLoad(operandNR(reg), amode, bits)
default:
panic("BUG")
}
m.insert(load)
m.unresolvedAddressModes = append(m.unresolvedAddressModes, load)
}
}
}
// LowerReturns lowers the given returns.
func (m *machine) LowerReturns(rets []ssa.Value) {
a := m.currentABI
l := len(rets) - 1
for i := range rets {
// Reverse order in order to avoid overwriting the stack returns existing in the return registers.
ret := rets[l-i]
r := &a.Rets[l-i]
reg := m.compiler.VRegOf(ret)
if def := m.compiler.ValueDefinition(ret); def.IsFromInstr() {
// Constant instructions are inlined.
if inst := def.Instr; inst.Constant() {
val := inst.Return()
valType := val.Type()
v := inst.ConstantVal()
m.insertLoadConstant(v, valType, reg)
}
}
if r.Kind == backend.ABIArgKindReg {
m.InsertMove(r.Reg, reg, ret.Type())
} else {
// TODO: we could use pair store if there's consecutive stores for the same type.
//
// (high address)
// +-----------------+
// | ....... |
// | ret Y |
// | ....... |
// | ret 0 | <-+
// | arg X | |
// | ....... | |
// | arg 1 | |
// | arg 0 | |
// | ReturnAddress | |
// +-----------------+ |
// | ........... | |
// | spill slot M | | retStackOffset: is unknown at this point of compilation.
// | ............ | |
// | spill slot 2 | |
// | spill slot 1 | |
// | clobbered 0 | |
// | clobbered 1 | |
// | ........... | |
// | clobbered N | |
// SP---> +-----------------+ <-+
// (low address)
bits := r.Type.Bits()
// At this point of compilation, we don't yet know how much space exist below the return address.
// So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation.
amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}
store := m.allocateInstr()
store.asStore(operandNR(reg), amode, bits)
m.insert(store)
m.unresolvedAddressModes = append(m.unresolvedAddressModes, store)
}
}
}
// callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the
// caller side of the function call.
func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) {
arg := &a.Args[argIndex]
if def != nil && def.IsFromInstr() {
// Constant instructions are inlined.
if inst := def.Instr; inst.Constant() {
val := inst.Return()
valType := val.Type()
v := inst.ConstantVal()
m.insertLoadConstant(v, valType, reg)
}
}
if arg.Kind == backend.ABIArgKindReg {
m.InsertMove(arg.Reg, reg, arg.Type)
} else {
// TODO: we could use pair store if there's consecutive stores for the same type.
//
// Note that at this point, stack pointer is already adjusted.
bits := arg.Type.Bits()
amode := m.resolveAddressModeForOffset(arg.Offset-slotBegin, bits, spVReg, false)
store := m.allocateInstr()
store.asStore(operandNR(reg), amode, bits)
m.insert(store)
}
}
func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex int, reg regalloc.VReg, slotBegin int64) {
r := &a.Rets[retIndex]
if r.Kind == backend.ABIArgKindReg {
m.InsertMove(reg, r.Reg, r.Type)
} else {
// TODO: we could use pair load if there's consecutive loads for the same type.
amode := m.resolveAddressModeForOffset(a.ArgStackSize+r.Offset-slotBegin, r.Type.Bits(), spVReg, false)
ldr := m.allocateInstr()
switch r.Type {
case ssa.TypeI32, ssa.TypeI64:
ldr.asULoad(operandNR(reg), amode, r.Type.Bits())
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits())
default:
panic("BUG")
}
m.insert(ldr)
}
}
func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) {
exct := m.executableContext
exct.PendingInstructions = exct.PendingInstructions[:0]
mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse)
for _, instr := range exct.PendingInstructions {
cur = linkInstr(cur, instr)
}
return cur, mode
}
func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode {
if rn.RegType() != regalloc.RegTypeInt {
panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64))
}
var amode addressMode
if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) {
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}
} else if offsetFitsInAddressModeKindRegSignedImm9(offset) {
amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}
} else {
var indexReg regalloc.VReg
if allowTmpRegUse {
m.lowerConstantI64(tmpRegVReg, offset)
indexReg = tmpRegVReg
} else {
indexReg = m.compiler.AllocateVReg(ssa.TypeI64)
m.lowerConstantI64(indexReg, offset)
}
amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}
}
return amode
}
func (m *machine) lowerCall(si *ssa.Instruction) {
isDirectCall := si.Opcode() == ssa.OpcodeCall
var indirectCalleePtr ssa.Value
var directCallee ssa.FuncRef
var sigID ssa.SignatureID
var args []ssa.Value
if isDirectCall {
directCallee, sigID, args = si.CallData()
} else {
indirectCalleePtr, sigID, args, _ /* on arm64, the calling convention is compatible with the Go runtime */ = si.CallIndirectData()
}
calleeABI := m.compiler.GetFunctionABI(m.compiler.SSABuilder().ResolveSignature(sigID))
stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize())
if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // return address frame.
}
for i, arg := range args {
reg := m.compiler.VRegOf(arg)
def := m.compiler.ValueDefinition(arg)
m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
}
if isDirectCall {
call := m.allocateInstr()
call.asCall(directCallee, calleeABI)
m.insert(call)
} else {
ptr := m.compiler.VRegOf(indirectCalleePtr)
callInd := m.allocateInstr()
callInd.asCallIndirect(ptr, calleeABI)
m.insert(callInd)
}
var index int
r1, rs := si.Returns()
if r1.Valid() {
m.callerGenFunctionReturnVReg(calleeABI, 0, m.compiler.VRegOf(r1), stackSlotSize)
index++
}
for _, r := range rs {
m.callerGenFunctionReturnVReg(calleeABI, index, m.compiler.VRegOf(r), stackSlotSize)
index++
}
}
func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) {
if imm12Operand, ok := asImm12Operand(uint64(diff)); ok {
alu := m.allocateInstr()
var ao aluOp
if add {
ao = aluOpAdd
} else {
ao = aluOpSub
}
alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true)
m.insert(alu)
} else {
m.lowerConstantI64(tmpRegVReg, diff)
alu := m.allocateInstr()
var ao aluOp
if add {
ao = aluOpAdd
} else {
ao = aluOpSub
}
alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true)
m.insert(alu)
}
}

View File

@ -0,0 +1,9 @@
package arm64
// entrypoint enters the machine code generated by this backend which begins with the preamble generated by functionABI.EmitGoEntryPreamble below.
// This implements wazevo.entrypoint, and see the comments there for detail.
func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr)
// afterGoFunctionCallEntrypoint enters the machine code after growing the stack.
// This implements wazevo.afterGoFunctionCallEntrypoint, and see the comments there for detail.
func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)

View File

@ -0,0 +1,29 @@
//go:build arm64
#include "funcdata.h"
#include "textflag.h"
// See the comments on EmitGoEntryPreamble for what this function is supposed to do.
TEXT ·entrypoint(SB), NOSPLIT|NOFRAME, $0-48
MOVD preambleExecutable+0(FP), R27
MOVD functionExectuable+8(FP), R24
MOVD executionContextPtr+16(FP), R0
MOVD moduleContextPtr+24(FP), R1
MOVD paramResultSlicePtr+32(FP), R19
MOVD goAllocatedStackSlicePtr+40(FP), R26
JMP (R27)
TEXT ·afterGoFunctionCallEntrypoint(SB), NOSPLIT|NOFRAME, $0-32
MOVD goCallReturnAddress+0(FP), R20
MOVD executionContextPtr+8(FP), R0
MOVD stackPointer+16(FP), R19
// Save the current FP(R29), SP and LR(R30) into the wazevo.executionContext (stored in R0).
MOVD R29, 16(R0) // Store FP(R29) into [RO, #ExecutionContextOffsets.OriginalFramePointer]
MOVD RSP, R27 // Move SP to R27 (temporary register) since SP cannot be stored directly in str instructions.
MOVD R27, 24(R0) // Store R27 into [RO, #ExecutionContextOffsets.OriginalFramePointer]
MOVD R30, 32(R0) // Store R30 into [R0, #ExecutionContextOffsets.GoReturnAddress]
// Load the new stack pointer (which sits somewhere in Go-allocated stack) into SP.
MOVD R19, RSP
JMP (R20)

View File

@ -0,0 +1,230 @@
package arm64
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
// CompileEntryPreamble implements backend.Machine. This assumes `entrypoint` function (in abi_go_entry_arm64.s) passes:
//
// 1. First (execution context ptr) and Second arguments are already passed in x0, and x1.
// 2. param/result slice ptr in x19; the pointer to []uint64{} which is used to pass arguments and accept return values.
// 3. Go-allocated stack slice ptr in x26.
// 4. Function executable in x24.
//
// also SP and FP are correct Go-runtime-based values, and LR is the return address to the Go-side caller.
func (m *machine) CompileEntryPreamble(signature *ssa.Signature) []byte {
root := m.constructEntryPreamble(signature)
m.encode(root)
return m.compiler.Buf()
}
var (
executionContextPtrReg = x0VReg
// callee-saved regs so that they can be used in the prologue and epilogue.
paramResultSlicePtr = x19VReg
savedExecutionContextPtr = x20VReg
// goAllocatedStackPtr is not used in the epilogue.
goAllocatedStackPtr = x26VReg
// paramResultSliceCopied is not used in the epilogue.
paramResultSliceCopied = x25VReg
// tmpRegVReg is not used in the epilogue.
functionExecutable = x24VReg
)
func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regalloc.VReg, arg *backend.ABIArg, argStartOffsetFromSP int64) *instruction {
typ := arg.Type
bits := typ.Bits()
isStackArg := arg.Kind == backend.ABIArgKindStack
var loadTargetReg operand
if !isStackArg {
loadTargetReg = operandNR(arg.Reg)
} else {
switch typ {
case ssa.TypeI32, ssa.TypeI64:
loadTargetReg = operandNR(x15VReg)
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
loadTargetReg = operandNR(v15VReg)
default:
panic("TODO?")
}
}
var postIndexImm int64
if typ == ssa.TypeV128 {
postIndexImm = 16 // v128 is represented as 2x64-bit in Go slice.
} else {
postIndexImm = 8
}
loadMode := addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm}
instr := m.allocateInstr()
switch typ {
case ssa.TypeI32:
instr.asULoad(loadTargetReg, loadMode, 32)
case ssa.TypeI64:
instr.asULoad(loadTargetReg, loadMode, 64)
case ssa.TypeF32:
instr.asFpuLoad(loadTargetReg, loadMode, 32)
case ssa.TypeF64:
instr.asFpuLoad(loadTargetReg, loadMode, 64)
case ssa.TypeV128:
instr.asFpuLoad(loadTargetReg, loadMode, 128)
}
cur = linkInstr(cur, instr)
if isStackArg {
var storeMode addressMode
cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true)
toStack := m.allocateInstr()
toStack.asStore(loadTargetReg, storeMode, bits)
cur = linkInstr(cur, toStack)
}
return cur
}
func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr regalloc.VReg, result *backend.ABIArg, resultStartOffsetFromSP int64) *instruction {
isStackArg := result.Kind == backend.ABIArgKindStack
typ := result.Type
bits := typ.Bits()
var storeTargetReg operand
if !isStackArg {
storeTargetReg = operandNR(result.Reg)
} else {
switch typ {
case ssa.TypeI32, ssa.TypeI64:
storeTargetReg = operandNR(x15VReg)
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
storeTargetReg = operandNR(v15VReg)
default:
panic("TODO?")
}
}
var postIndexImm int64
if typ == ssa.TypeV128 {
postIndexImm = 16 // v128 is represented as 2x64-bit in Go slice.
} else {
postIndexImm = 8
}
if isStackArg {
var loadMode addressMode
cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true)
toReg := m.allocateInstr()
switch typ {
case ssa.TypeI32, ssa.TypeI64:
toReg.asULoad(storeTargetReg, loadMode, bits)
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
toReg.asFpuLoad(storeTargetReg, loadMode, bits)
default:
panic("TODO?")
}
cur = linkInstr(cur, toReg)
}
mode := addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm}
instr := m.allocateInstr()
instr.asStore(storeTargetReg, mode, bits)
cur = linkInstr(cur, instr)
return cur
}
func (m *machine) constructEntryPreamble(sig *ssa.Signature) (root *instruction) {
abi := backend.FunctionABI{}
abi.Init(sig, intParamResultRegs, floatParamResultRegs)
root = m.allocateNop()
//// ----------------------------------- prologue ----------------------------------- ////
// First, we save executionContextPtrReg into a callee-saved register so that it can be used in epilogue as well.
// mov savedExecutionContextPtr, x0
cur := m.move64(savedExecutionContextPtr, executionContextPtrReg, root)
// Next, save the current FP, SP and LR into the wazevo.executionContext:
// str fp, [savedExecutionContextPtr, #OriginalFramePointer]
// mov tmp, sp ;; sp cannot be str'ed directly.
// str sp, [savedExecutionContextPtr, #OriginalStackPointer]
// str lr, [savedExecutionContextPtr, #GoReturnAddress]
cur = m.loadOrStoreAtExecutionContext(fpVReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, true, cur)
cur = m.move64(tmpRegVReg, spVReg, cur)
cur = m.loadOrStoreAtExecutionContext(tmpRegVReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, true, cur)
cur = m.loadOrStoreAtExecutionContext(lrVReg, wazevoapi.ExecutionContextOffsetGoReturnAddress, true, cur)
// Then, move the Go-allocated stack pointer to SP:
// mov sp, goAllocatedStackPtr
cur = m.move64(spVReg, goAllocatedStackPtr, cur)
prReg := paramResultSlicePtr
if len(abi.Args) > 2 && len(abi.Rets) > 0 {
// paramResultSlicePtr is modified during the execution of goEntryPreamblePassArg,
// so copy it to another reg.
cur = m.move64(paramResultSliceCopied, paramResultSlicePtr, cur)
prReg = paramResultSliceCopied
}
stackSlotSize := int64(abi.AlignedArgResultStackSlotSize())
for i := range abi.Args {
if i < 2 {
// module context ptr and execution context ptr are passed in x0 and x1 by the Go assembly function.
continue
}
arg := &abi.Args[i]
cur = m.goEntryPreamblePassArg(cur, prReg, arg, -stackSlotSize)
}
// Call the real function.
bl := m.allocateInstr()
bl.asCallIndirect(functionExecutable, &abi)
cur = linkInstr(cur, bl)
///// ----------------------------------- epilogue ----------------------------------- /////
// Store the register results into paramResultSlicePtr.
for i := range abi.Rets {
cur = m.goEntryPreamblePassResult(cur, paramResultSlicePtr, &abi.Rets[i], abi.ArgStackSize-stackSlotSize)
}
// Finally, restore the FP, SP and LR, and return to the Go code.
// ldr fp, [savedExecutionContextPtr, #OriginalFramePointer]
// ldr tmp, [savedExecutionContextPtr, #OriginalStackPointer]
// mov sp, tmp ;; sp cannot be str'ed directly.
// ldr lr, [savedExecutionContextPtr, #GoReturnAddress]
// ret ;; --> return to the Go code
cur = m.loadOrStoreAtExecutionContext(fpVReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, false, cur)
cur = m.loadOrStoreAtExecutionContext(tmpRegVReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, false, cur)
cur = m.move64(spVReg, tmpRegVReg, cur)
cur = m.loadOrStoreAtExecutionContext(lrVReg, wazevoapi.ExecutionContextOffsetGoReturnAddress, false, cur)
retInst := m.allocateInstr()
retInst.asRet()
linkInstr(cur, retInst)
return
}
func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction {
instr := m.allocateInstr()
instr.asMove64(dst, src)
return linkInstr(prev, instr)
}
func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction {
instr := m.allocateInstr()
mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()}
if store {
instr.asStore(operandNR(d), mode, 64)
} else {
instr.asULoad(operandNR(d), mode, 64)
}
return linkInstr(prev, instr)
}
func linkInstr(prev, next *instruction) *instruction {
prev.next = next
next.prev = prev
return next
}

View File

@ -0,0 +1,428 @@
package arm64
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
var calleeSavedRegistersSorted = []regalloc.VReg{
x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg,
v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg,
}
// CompileGoFunctionTrampoline implements backend.Machine.
func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte {
exct := m.executableContext
argBegin := 1 // Skips exec context by default.
if needModuleContextPtr {
argBegin++
}
abi := &backend.FunctionABI{}
abi.Init(sig, intParamResultRegs, floatParamResultRegs)
m.currentABI = abi
cur := m.allocateInstr()
cur.asNop0()
exct.RootInstr = cur
// Execution context is always the first argument.
execCtrPtr := x0VReg
// In the following, we create the following stack layout:
//
// (high address)
// SP ------> +-----------------+ <----+
// | ....... | |
// | ret Y | |
// | ....... | |
// | ret 0 | |
// | arg X | | size_of_arg_ret
// | ....... | |
// | arg 1 | |
// | arg 0 | <----+ <-------- originalArg0Reg
// | size_of_arg_ret |
// | ReturnAddress |
// +-----------------+ <----+
// | xxxx | | ;; might be padded to make it 16-byte aligned.
// +--->| arg[N]/ret[M] | |
// sliceSize| | ............ | | goCallStackSize
// | | arg[1]/ret[1] | |
// +--->| arg[0]/ret[0] | <----+ <-------- arg0ret0AddrReg
// | sliceSize |
// | frame_size |
// +-----------------+
// (low address)
//
// where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions,
// therefore will be accessed as the usual []uint64. So that's where we need to pass/receive
// the arguments/return values.
// First of all, to update the SP, and create "ReturnAddress + size_of_arg_ret".
cur = m.createReturnAddrAndSizeOfArgRetSlot(cur)
const frameInfoSize = 16 // == frame_size + sliceSize.
// Next, we should allocate the stack for the Go function call if necessary.
goCallStackSize, sliceSizeInBytes := backend.GoFunctionCallRequiredStackSize(sig, argBegin)
cur = m.insertStackBoundsCheck(goCallStackSize+frameInfoSize, cur)
originalArg0Reg := x17VReg // Caller save, so we can use it for whatever we want.
if m.currentABI.AlignedArgResultStackSlotSize() > 0 {
// At this point, SP points to `ReturnAddress`, so add 16 to get the original arg 0 slot.
cur = m.addsAddOrSubStackPointer(cur, originalArg0Reg, frameInfoSize, true)
}
// Save the callee saved registers.
cur = m.saveRegistersInExecutionContext(cur, calleeSavedRegistersSorted)
if needModuleContextPtr {
offset := wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.I64()
if !offsetFitsInAddressModeKindRegUnsignedImm12(64, offset) {
panic("BUG: too large or un-aligned offset for goFunctionCallCalleeModuleContextOpaque in execution context")
}
// Module context is always the second argument.
moduleCtrPtr := x1VReg
store := m.allocateInstr()
amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset}
store.asStore(operandNR(moduleCtrPtr), amode, 64)
cur = linkInstr(cur, store)
}
// Advances the stack pointer.
cur = m.addsAddOrSubStackPointer(cur, spVReg, goCallStackSize, false)
// Copy the pointer to x15VReg.
arg0ret0AddrReg := x15VReg // Caller save, so we can use it for whatever we want.
copySp := m.allocateInstr()
copySp.asMove64(arg0ret0AddrReg, spVReg)
cur = linkInstr(cur, copySp)
// Next, we need to store all the arguments to the stack in the typical Wasm stack style.
for i := range abi.Args[argBegin:] {
arg := &abi.Args[argBegin+i]
store := m.allocateInstr()
var v regalloc.VReg
if arg.Kind == backend.ABIArgKindReg {
v = arg.Reg
} else {
cur, v = m.goFunctionCallLoadStackArg(cur, originalArg0Reg, arg,
// Caller save, so we can use it for whatever we want.
x11VReg, v11VReg)
}
var sizeInBits byte
if arg.Type == ssa.TypeV128 {
sizeInBits = 128
} else {
sizeInBits = 64
}
store.asStore(operandNR(v),
addressMode{
kind: addressModeKindPostIndex,
rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8),
}, sizeInBits)
cur = linkInstr(cur, store)
}
// Finally, now that we've advanced SP to arg[0]/ret[0], we allocate `frame_size + sliceSize`.
var frameSizeReg, sliceSizeReg regalloc.VReg
if goCallStackSize > 0 {
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, goCallStackSize)
frameSizeReg = tmpRegVReg
cur = m.lowerConstantI64AndInsert(cur, x16VReg, sliceSizeInBytes/8)
sliceSizeReg = x16VReg
} else {
frameSizeReg = xzrVReg
sliceSizeReg = xzrVReg
}
_amode := addressModePreOrPostIndex(spVReg, -16, true)
storeP := m.allocateInstr()
storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode)
cur = linkInstr(cur, storeP)
// Set the exit status on the execution context.
cur = m.setExitCode(cur, x0VReg, exitCode)
// Save the current stack pointer.
cur = m.saveCurrentStackPointer(cur, x0VReg)
// Exit the execution.
cur = m.storeReturnAddressAndExit(cur)
// After the call, we need to restore the callee saved registers.
cur = m.restoreRegistersInExecutionContext(cur, calleeSavedRegistersSorted)
// Get the pointer to the arg[0]/ret[0]: We need to skip `frame_size + sliceSize`.
if len(abi.Rets) > 0 {
cur = m.addsAddOrSubStackPointer(cur, arg0ret0AddrReg, frameInfoSize, true)
}
// Advances the SP so that it points to `ReturnAddress`.
cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true)
ldr := m.allocateInstr()
// And load the return address.
ldr.asULoad(operandNR(lrVReg),
addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
cur = linkInstr(cur, ldr)
originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want.
if m.currentABI.RetStackSize > 0 {
cur = m.addsAddOrSubStackPointer(cur, originalRet0Reg, m.currentABI.ArgStackSize, true)
}
// Make the SP point to the original address (above the result slot).
if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 {
cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
}
for i := range abi.Rets {
r := &abi.Rets[i]
if r.Kind == backend.ABIArgKindReg {
loadIntoReg := m.allocateInstr()
mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
switch r.Type {
case ssa.TypeI32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
loadIntoReg.asULoad(operandNR(r.Reg), mode, 32)
case ssa.TypeI64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
loadIntoReg.asULoad(operandNR(r.Reg), mode, 64)
case ssa.TypeF32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32)
case ssa.TypeF64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64)
case ssa.TypeV128:
mode.imm = 16
loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128)
default:
panic("TODO")
}
cur = linkInstr(cur, loadIntoReg)
} else {
// First we need to load the value to a temporary just like ^^.
intTmp, floatTmp := x11VReg, v11VReg
loadIntoTmpReg := m.allocateInstr()
mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
var resultReg regalloc.VReg
switch r.Type {
case ssa.TypeI32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32)
resultReg = intTmp
case ssa.TypeI64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64)
resultReg = intTmp
case ssa.TypeF32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32)
resultReg = floatTmp
case ssa.TypeF64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64)
resultReg = floatTmp
case ssa.TypeV128:
mode.imm = 16
loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128)
resultReg = floatTmp
default:
panic("TODO")
}
cur = linkInstr(cur, loadIntoTmpReg)
cur = m.goFunctionCallStoreStackResult(cur, originalRet0Reg, r, resultReg)
}
}
ret := m.allocateInstr()
ret.asRet()
linkInstr(cur, ret)
m.encode(m.executableContext.RootInstr)
return m.compiler.Buf()
}
func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction {
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
for _, v := range regs {
store := m.allocateInstr()
var sizeInBits byte
switch v.RegType() {
case regalloc.RegTypeInt:
sizeInBits = 64
case regalloc.RegTypeFloat:
sizeInBits = 128
}
store.asStore(operandNR(v),
addressMode{
kind: addressModeKindRegUnsignedImm12,
// Execution context is always the first argument.
rn: x0VReg, imm: offset,
}, sizeInBits)
store.prev = cur
cur.next = store
cur = store
offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally store regs at the offset of multiple of 16.
}
return cur
}
func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction {
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
for _, v := range regs {
load := m.allocateInstr()
var as func(dst operand, amode addressMode, sizeInBits byte)
var sizeInBits byte
switch v.RegType() {
case regalloc.RegTypeInt:
as = load.asULoad
sizeInBits = 64
case regalloc.RegTypeFloat:
as = load.asFpuLoad
sizeInBits = 128
}
as(operandNR(v),
addressMode{
kind: addressModeKindRegUnsignedImm12,
// Execution context is always the first argument.
rn: x0VReg, imm: offset,
}, sizeInBits)
cur = linkInstr(cur, load)
offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16.
}
return cur
}
func (m *machine) lowerConstantI64AndInsert(cur *instruction, dst regalloc.VReg, v int64) *instruction {
exct := m.executableContext
exct.PendingInstructions = exct.PendingInstructions[:0]
m.lowerConstantI64(dst, v)
for _, instr := range exct.PendingInstructions {
cur = linkInstr(cur, instr)
}
return cur
}
func (m *machine) lowerConstantI32AndInsert(cur *instruction, dst regalloc.VReg, v int32) *instruction {
exct := m.executableContext
exct.PendingInstructions = exct.PendingInstructions[:0]
m.lowerConstantI32(dst, v)
for _, instr := range exct.PendingInstructions {
cur = linkInstr(cur, instr)
}
return cur
}
func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode wazevoapi.ExitCode) *instruction {
constReg := x17VReg // caller-saved, so we can use it.
cur = m.lowerConstantI32AndInsert(cur, constReg, int32(exitCode))
// Set the exit status on the execution context.
setExistStatus := m.allocateInstr()
setExistStatus.asStore(operandNR(constReg),
addressMode{
kind: addressModeKindRegUnsignedImm12,
rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
}, 32)
cur = linkInstr(cur, setExistStatus)
return cur
}
func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction {
// Read the return address into tmp, and store it in the execution context.
adr := m.allocateInstr()
adr.asAdr(tmpRegVReg, exitSequenceSize+8)
cur = linkInstr(cur, adr)
storeReturnAddr := m.allocateInstr()
storeReturnAddr.asStore(operandNR(tmpRegVReg),
addressMode{
kind: addressModeKindRegUnsignedImm12,
// Execution context is always the first argument.
rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
}, 64)
cur = linkInstr(cur, storeReturnAddr)
// Exit the execution.
trapSeq := m.allocateInstr()
trapSeq.asExitSequence(x0VReg)
cur = linkInstr(cur, trapSeq)
return cur
}
func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VReg) *instruction {
// Save the current stack pointer:
// mov tmp, sp,
// str tmp, [exec_ctx, #stackPointerBeforeGoCall]
movSp := m.allocateInstr()
movSp.asMove64(tmpRegVReg, spVReg)
cur = linkInstr(cur, movSp)
strSp := m.allocateInstr()
strSp.asStore(operandNR(tmpRegVReg),
addressMode{
kind: addressModeKindRegUnsignedImm12,
rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
}, 64)
cur = linkInstr(cur, strSp)
return cur
}
func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) {
load := m.allocateInstr()
var result regalloc.VReg
mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg}
switch arg.Type {
case ssa.TypeI32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
load.asULoad(operandNR(intVReg), mode, 32)
result = intVReg
case ssa.TypeI64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
load.asULoad(operandNR(intVReg), mode, 64)
result = intVReg
case ssa.TypeF32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
load.asFpuLoad(operandNR(floatVReg), mode, 32)
result = floatVReg
case ssa.TypeF64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
load.asFpuLoad(operandNR(floatVReg), mode, 64)
result = floatVReg
case ssa.TypeV128:
mode.imm = 16
load.asFpuLoad(operandNR(floatVReg), mode, 128)
result = floatVReg
default:
panic("TODO")
}
cur = linkInstr(cur, load)
return cur, result
}
func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction {
store := m.allocateInstr()
mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg}
var sizeInBits byte
switch result.Type {
case ssa.TypeI32, ssa.TypeF32:
mode.imm = 8
sizeInBits = 32
case ssa.TypeI64, ssa.TypeF64:
mode.imm = 8
sizeInBits = 64
case ssa.TypeV128:
mode.imm = 16
sizeInBits = 128
default:
panic("TODO")
}
store.asStore(operandNR(resultVReg), mode, sizeInBits)
return linkInstr(cur, store)
}

View File

@ -0,0 +1,215 @@
package arm64
import (
"strconv"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
type (
cond uint64
condKind byte
)
const (
// condKindRegisterZero represents a condition which checks if the register is zero.
// This indicates that the instruction must be encoded as CBZ:
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/CBZ--Compare-and-Branch-on-Zero-
condKindRegisterZero condKind = iota
// condKindRegisterNotZero indicates that the instruction must be encoded as CBNZ:
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/CBNZ--Compare-and-Branch-on-Nonzero-
condKindRegisterNotZero
// condKindCondFlagSet indicates that the instruction must be encoded as B.cond:
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally-
condKindCondFlagSet
)
// kind returns the kind of condition which is stored in the first two bits.
func (c cond) kind() condKind {
return condKind(c & 0b11)
}
func (c cond) asUint64() uint64 {
return uint64(c)
}
// register returns the register for register conditions.
// This panics if the condition is not a register condition (condKindRegisterZero or condKindRegisterNotZero).
func (c cond) register() regalloc.VReg {
if c.kind() != condKindRegisterZero && c.kind() != condKindRegisterNotZero {
panic("condition is not a register")
}
return regalloc.VReg(c >> 2)
}
func registerAsRegZeroCond(r regalloc.VReg) cond {
return cond(r)<<2 | cond(condKindRegisterZero)
}
func registerAsRegNotZeroCond(r regalloc.VReg) cond {
return cond(r)<<2 | cond(condKindRegisterNotZero)
}
func (c cond) flag() condFlag {
if c.kind() != condKindCondFlagSet {
panic("condition is not a flag")
}
return condFlag(c >> 2)
}
func (c condFlag) asCond() cond {
return cond(c)<<2 | cond(condKindCondFlagSet)
}
// condFlag represents a condition flag for conditional branches.
// The value matches the encoding of condition flags in the ARM64 instruction set.
// https://developer.arm.com/documentation/den0024/a/The-A64-instruction-set/Data-processing-instructions/Conditional-instructions
type condFlag uint8
const (
eq condFlag = iota // eq represents "equal"
ne // ne represents "not equal"
hs // hs represents "higher or same"
lo // lo represents "lower"
mi // mi represents "minus or negative result"
pl // pl represents "plus or positive result"
vs // vs represents "overflow set"
vc // vc represents "overflow clear"
hi // hi represents "higher"
ls // ls represents "lower or same"
ge // ge represents "greater or equal"
lt // lt represents "less than"
gt // gt represents "greater than"
le // le represents "less than or equal"
al // al represents "always"
nv // nv represents "never"
)
// invert returns the inverted condition.
func (c condFlag) invert() condFlag {
switch c {
case eq:
return ne
case ne:
return eq
case hs:
return lo
case lo:
return hs
case mi:
return pl
case pl:
return mi
case vs:
return vc
case vc:
return vs
case hi:
return ls
case ls:
return hi
case ge:
return lt
case lt:
return ge
case gt:
return le
case le:
return gt
case al:
return nv
case nv:
return al
default:
panic(c)
}
}
// String implements fmt.Stringer.
func (c condFlag) String() string {
switch c {
case eq:
return "eq"
case ne:
return "ne"
case hs:
return "hs"
case lo:
return "lo"
case mi:
return "mi"
case pl:
return "pl"
case vs:
return "vs"
case vc:
return "vc"
case hi:
return "hi"
case ls:
return "ls"
case ge:
return "ge"
case lt:
return "lt"
case gt:
return "gt"
case le:
return "le"
case al:
return "al"
case nv:
return "nv"
default:
panic(strconv.Itoa(int(c)))
}
}
// condFlagFromSSAIntegerCmpCond returns the condition flag for the given ssa.IntegerCmpCond.
func condFlagFromSSAIntegerCmpCond(c ssa.IntegerCmpCond) condFlag {
switch c {
case ssa.IntegerCmpCondEqual:
return eq
case ssa.IntegerCmpCondNotEqual:
return ne
case ssa.IntegerCmpCondSignedLessThan:
return lt
case ssa.IntegerCmpCondSignedGreaterThanOrEqual:
return ge
case ssa.IntegerCmpCondSignedGreaterThan:
return gt
case ssa.IntegerCmpCondSignedLessThanOrEqual:
return le
case ssa.IntegerCmpCondUnsignedLessThan:
return lo
case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual:
return hs
case ssa.IntegerCmpCondUnsignedGreaterThan:
return hi
case ssa.IntegerCmpCondUnsignedLessThanOrEqual:
return ls
default:
panic(c)
}
}
// condFlagFromSSAFloatCmpCond returns the condition flag for the given ssa.FloatCmpCond.
func condFlagFromSSAFloatCmpCond(c ssa.FloatCmpCond) condFlag {
switch c {
case ssa.FloatCmpCondEqual:
return eq
case ssa.FloatCmpCondNotEqual:
return ne
case ssa.FloatCmpCondLessThan:
return mi
case ssa.FloatCmpCondLessThanOrEqual:
return ls
case ssa.FloatCmpCondGreaterThan:
return gt
case ssa.FloatCmpCondGreaterThanOrEqual:
return ge
default:
panic(c)
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,301 @@
package arm64
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
// lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
val := instr.Return()
valType := val.Type()
vr = m.compiler.AllocateVReg(valType)
v := instr.ConstantVal()
m.insertLoadConstant(v, valType, vr)
return
}
// InsertLoadConstantBlockArg implements backend.Machine.
func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) {
val := instr.Return()
valType := val.Type()
v := instr.ConstantVal()
load := m.allocateInstr()
load.asLoadConstBlockArg(v, valType, vr)
m.insert(load)
}
func (m *machine) lowerLoadConstantBlockArgAfterRegAlloc(i *instruction) {
v, typ, dst := i.loadConstBlockArgData()
m.insertLoadConstant(v, typ, dst)
}
func (m *machine) insertLoadConstant(v uint64, valType ssa.Type, vr regalloc.VReg) {
if valType.Bits() < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
v = v & ((1 << valType.Bits()) - 1)
}
switch valType {
case ssa.TypeF32:
loadF := m.allocateInstr()
loadF.asLoadFpuConst32(vr, v)
m.insert(loadF)
case ssa.TypeF64:
loadF := m.allocateInstr()
loadF.asLoadFpuConst64(vr, v)
m.insert(loadF)
case ssa.TypeI32:
if v == 0 {
m.InsertMove(vr, xzrVReg, ssa.TypeI32)
} else {
m.lowerConstantI32(vr, int32(v))
}
case ssa.TypeI64:
if v == 0 {
m.InsertMove(vr, xzrVReg, ssa.TypeI64)
} else {
m.lowerConstantI64(vr, int64(v))
}
default:
panic("TODO")
}
}
// The following logics are based on the old asm/arm64 package.
// https://github.com/tetratelabs/wazero/blob/39f2ff23a6d609e10c82b9cc0b981f6de5b87a9c/internal/asm/arm64/impl.go
func (m *machine) lowerConstantI32(dst regalloc.VReg, c int32) {
// Following the logic here:
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637
ic := int64(uint32(c))
if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) {
if isBitMaskImmediate(uint64(c), false) {
m.lowerConstViaBitMaskImmediate(uint64(uint32(c)), dst, false)
return
}
}
if t := const16bitAligned(int64(uint32(c))); t >= 0 {
// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
// We could load it into temporary with movk.
m.insertMOVZ(dst, uint64(uint32(c)>>(16*t)), t, false)
} else if t := const16bitAligned(int64(^c)); t >= 0 {
// Also, if the inverse of the const can fit within 16-bit range, do the same ^^.
m.insertMOVN(dst, uint64(^c>>(16*t)), t, false)
} else if isBitMaskImmediate(uint64(uint32(c)), false) {
m.lowerConstViaBitMaskImmediate(uint64(c), dst, false)
} else {
// Otherwise, we use MOVZ and MOVK to load it.
c16 := uint16(c)
m.insertMOVZ(dst, uint64(c16), 0, false)
c16 = uint16(uint32(c) >> 16)
m.insertMOVK(dst, uint64(c16), 1, false)
}
}
func (m *machine) lowerConstantI64(dst regalloc.VReg, c int64) {
// Following the logic here:
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852
if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) {
if isBitMaskImmediate(uint64(c), true) {
m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
return
}
}
if t := const16bitAligned(c); t >= 0 {
// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
// We could load it into temporary with movk.
m.insertMOVZ(dst, uint64(c)>>(16*t), t, true)
} else if t := const16bitAligned(^c); t >= 0 {
// Also, if the reverse of the const can fit within 16-bit range, do the same ^^.
m.insertMOVN(dst, uint64(^c)>>(16*t), t, true)
} else if isBitMaskImmediate(uint64(c), true) {
m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
} else {
m.load64bitConst(c, dst)
}
}
func (m *machine) lowerConstViaBitMaskImmediate(c uint64, dst regalloc.VReg, b64 bool) {
instr := m.allocateInstr()
instr.asALUBitmaskImm(aluOpOrr, dst, xzrVReg, c, b64)
m.insert(instr)
}
// isBitMaskImmediate determines if the value can be encoded as "bitmask immediate".
//
// Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits.
// Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits.
//
// See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate-
func isBitMaskImmediate(x uint64, _64 bool) bool {
// All zeros and ones are not "bitmask immediate" by definition.
if x == 0 || (_64 && x == 0xffff_ffff_ffff_ffff) || (!_64 && x == 0xffff_ffff) {
return false
}
switch {
case x != x>>32|x<<32:
// e = 64
case x != x>>16|x<<48:
// e = 32 (x == x>>32|x<<32).
// e.g. 0x00ff_ff00_00ff_ff00
x = uint64(int32(x))
case x != x>>8|x<<56:
// e = 16 (x == x>>16|x<<48).
// e.g. 0x00ff_00ff_00ff_00ff
x = uint64(int16(x))
case x != x>>4|x<<60:
// e = 8 (x == x>>8|x<<56).
// e.g. 0x0f0f_0f0f_0f0f_0f0f
x = uint64(int8(x))
default:
// e = 4 or 2.
return true
}
return sequenceOfSetbits(x) || sequenceOfSetbits(^x)
}
// sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1).
// For example: 0b1110 -> true, 0b1010 -> false
func sequenceOfSetbits(x uint64) bool {
y := getLowestBit(x)
// If x is a sequence of set bit, this should results in the number
// with only one set bit (i.e. power of two).
y += x
return (y-1)&y == 0
}
func getLowestBit(x uint64) uint64 {
return x & (^x + 1)
}
// const16bitAligned check if the value is on the 16-bit alignment.
// If so, returns the shift num divided by 16, and otherwise -1.
func const16bitAligned(v int64) (ret int) {
ret = -1
for s := 0; s < 64; s += 16 {
if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 {
ret = s / 16
break
}
}
return
}
// load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit
// consts as in the Go assembler.
//
// See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759
func (m *machine) load64bitConst(c int64, dst regalloc.VReg) {
var bits [4]uint64
var zeros, negs int
for i := 0; i < 4; i++ {
bits[i] = uint64(c) >> uint(i*16) & 0xffff
if v := bits[i]; v == 0 {
zeros++
} else if v == 0xffff {
negs++
}
}
if zeros == 3 {
// one MOVZ instruction.
for i, v := range bits {
if v != 0 {
m.insertMOVZ(dst, v, i, true)
}
}
} else if negs == 3 {
// one MOVN instruction.
for i, v := range bits {
if v != 0xffff {
v = ^v
m.insertMOVN(dst, v, i, true)
}
}
} else if zeros == 2 {
// one MOVZ then one OVK.
var movz bool
for i, v := range bits {
if !movz && v != 0 { // MOVZ.
m.insertMOVZ(dst, v, i, true)
movz = true
} else if v != 0 {
m.insertMOVK(dst, v, i, true)
}
}
} else if negs == 2 {
// one MOVN then one or two MOVK.
var movn bool
for i, v := range bits { // Emit MOVN.
if !movn && v != 0xffff {
v = ^v
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
m.insertMOVN(dst, v, i, true)
movn = true
} else if v != 0xffff {
m.insertMOVK(dst, v, i, true)
}
}
} else if zeros == 1 {
// one MOVZ then two MOVK.
var movz bool
for i, v := range bits {
if !movz && v != 0 { // MOVZ.
m.insertMOVZ(dst, v, i, true)
movz = true
} else if v != 0 {
m.insertMOVK(dst, v, i, true)
}
}
} else if negs == 1 {
// one MOVN then two MOVK.
var movn bool
for i, v := range bits { // Emit MOVN.
if !movn && v != 0xffff {
v = ^v
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
m.insertMOVN(dst, v, i, true)
movn = true
} else if v != 0xffff {
m.insertMOVK(dst, v, i, true)
}
}
} else {
// one MOVZ then up to three MOVK.
var movz bool
for i, v := range bits {
if !movz && v != 0 { // MOVZ.
m.insertMOVZ(dst, v, i, true)
movz = true
} else if v != 0 {
m.insertMOVK(dst, v, i, true)
}
}
}
}
func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
instr := m.allocateInstr()
instr.asMOVZ(dst, v, uint64(shift), dst64)
m.insert(instr)
}
func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
instr := m.allocateInstr()
instr.asMOVK(dst, v, uint64(shift), dst64)
m.insert(instr)
}
func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
instr := m.allocateInstr()
instr.asMOVN(dst, v, uint64(shift), dst64)
m.insert(instr)
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,350 @@
package arm64
// This file contains the logic to "find and determine operands" for instructions.
// In order to finalize the form of an operand, we might end up merging/eliminating
// the source instructions into an operand whenever possible.
import (
"fmt"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
type (
// operand represents an operand of an instruction whose type is determined by the kind.
operand struct {
kind operandKind
data, data2 uint64
}
operandKind byte
)
// Here's the list of operand kinds. We use the abbreviation of the kind name not only for these consts,
// but also names of functions which return the operand of the kind.
const (
// operandKindNR represents "NormalRegister" (NR). This is literally the register without any special operation unlike others.
operandKindNR operandKind = iota
// operandKindSR represents "Shifted Register" (SR). This is a register which is shifted by a constant.
// Some of the arm64 instructions can take this kind of operand.
operandKindSR
// operandKindER represents "Extended Register (ER). This is a register which is sign/zero-extended to a larger size.
// Some of the arm64 instructions can take this kind of operand.
operandKindER
// operandKindImm12 represents "Immediate 12" (Imm12). This is a 12-bit immediate value which can be either shifted by 12 or not.
// See asImm12 function for detail.
operandKindImm12
// operandKindShiftImm represents "Shifted Immediate" (ShiftImm) used by shift operations.
operandKindShiftImm
)
// String implements fmt.Stringer for debugging.
func (o operand) format(size byte) string {
switch o.kind {
case operandKindNR:
return formatVRegSized(o.nr(), size)
case operandKindSR:
r, amt, sop := o.sr()
return fmt.Sprintf("%s, %s #%d", formatVRegSized(r, size), sop, amt)
case operandKindER:
r, eop, _ := o.er()
return fmt.Sprintf("%s %s", formatVRegSized(r, size), eop)
case operandKindImm12:
imm12, shiftBit := o.imm12()
if shiftBit == 1 {
return fmt.Sprintf("#%#x", uint64(imm12)<<12)
} else {
return fmt.Sprintf("#%#x", imm12)
}
default:
panic(fmt.Sprintf("unknown operand kind: %d", o.kind))
}
}
// operandNR encodes the given VReg as an operand of operandKindNR.
func operandNR(r regalloc.VReg) operand {
return operand{kind: operandKindNR, data: uint64(r)}
}
// nr decodes the underlying VReg assuming the operand is of operandKindNR.
func (o operand) nr() regalloc.VReg {
return regalloc.VReg(o.data)
}
// operandER encodes the given VReg as an operand of operandKindER.
func operandER(r regalloc.VReg, eop extendOp, to byte) operand {
if to < 32 {
panic("TODO?BUG?: when we need to extend to less than 32 bits?")
}
return operand{kind: operandKindER, data: uint64(r), data2: uint64(eop)<<32 | uint64(to)}
}
// er decodes the underlying VReg, extend operation, and the target size assuming the operand is of operandKindER.
func (o operand) er() (r regalloc.VReg, eop extendOp, to byte) {
return regalloc.VReg(o.data), extendOp(o.data2>>32) & 0xff, byte(o.data2 & 0xff)
}
// operandSR encodes the given VReg as an operand of operandKindSR.
func operandSR(r regalloc.VReg, amt byte, sop shiftOp) operand {
return operand{kind: operandKindSR, data: uint64(r), data2: uint64(amt)<<32 | uint64(sop)}
}
// sr decodes the underlying VReg, shift amount, and shift operation assuming the operand is of operandKindSR.
func (o operand) sr() (r regalloc.VReg, amt byte, sop shiftOp) {
return regalloc.VReg(o.data), byte(o.data2>>32) & 0xff, shiftOp(o.data2) & 0xff
}
// operandImm12 encodes the given imm12 as an operand of operandKindImm12.
func operandImm12(imm12 uint16, shiftBit byte) operand {
return operand{kind: operandKindImm12, data: uint64(imm12) | uint64(shiftBit)<<32}
}
// imm12 decodes the underlying imm12 data assuming the operand is of operandKindImm12.
func (o operand) imm12() (v uint16, shiftBit byte) {
return uint16(o.data), byte(o.data >> 32)
}
// operandShiftImm encodes the given amount as an operand of operandKindShiftImm.
func operandShiftImm(amount byte) operand {
return operand{kind: operandKindShiftImm, data: uint64(amount)}
}
// shiftImm decodes the underlying shift amount data assuming the operand is of operandKindShiftImm.
func (o operand) shiftImm() byte {
return byte(o.data)
}
// reg returns the register of the operand if applicable.
func (o operand) reg() regalloc.VReg {
switch o.kind {
case operandKindNR:
return o.nr()
case operandKindSR:
r, _, _ := o.sr()
return r
case operandKindER:
r, _, _ := o.er()
return r
case operandKindImm12:
// Does not have a register.
case operandKindShiftImm:
// Does not have a register.
default:
panic(o.kind)
}
return regalloc.VRegInvalid
}
func (o operand) realReg() regalloc.RealReg {
return o.nr().RealReg()
}
func (o operand) assignReg(v regalloc.VReg) operand {
switch o.kind {
case operandKindNR:
return operandNR(v)
case operandKindSR:
_, amt, sop := o.sr()
return operandSR(v, amt, sop)
case operandKindER:
_, eop, to := o.er()
return operandER(v, eop, to)
case operandKindImm12:
// Does not have a register.
case operandKindShiftImm:
// Does not have a register.
}
panic(o.kind)
}
// ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def).
//
// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
// If the operand can be expressed as operandKindImm12, `mode` is ignored.
func (m *machine) getOperand_Imm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
if def.IsFromBlockParam() {
return operandNR(def.BlkParamVReg)
}
instr := def.Instr
if instr.Opcode() == ssa.OpcodeIconst {
if imm12Op, ok := asImm12Operand(instr.ConstantVal()); ok {
instr.MarkLowered()
return imm12Op
}
}
return m.getOperand_ER_SR_NR(def, mode)
}
// getOperand_MaybeNegatedImm12_ER_SR_NR is almost the same as getOperand_Imm12_ER_SR_NR, but this might negate the immediate value.
// If the immediate value is negated, the second return value is true, otherwise always false.
func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) {
if def.IsFromBlockParam() {
return operandNR(def.BlkParamVReg), false
}
instr := def.Instr
if instr.Opcode() == ssa.OpcodeIconst {
c := instr.ConstantVal()
if imm12Op, ok := asImm12Operand(c); ok {
instr.MarkLowered()
return imm12Op, false
}
signExtended := int64(c)
if def.SSAValue().Type().Bits() == 32 {
signExtended = (signExtended << 32) >> 32
}
negatedWithoutSign := -signExtended
if imm12Op, ok := asImm12Operand(uint64(negatedWithoutSign)); ok {
instr.MarkLowered()
return imm12Op, true
}
}
return m.getOperand_ER_SR_NR(def, mode), false
}
// ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def).
//
// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
if def.IsFromBlockParam() {
return operandNR(def.BlkParamVReg)
}
if m.compiler.MatchInstr(def, ssa.OpcodeSExtend) || m.compiler.MatchInstr(def, ssa.OpcodeUExtend) {
extInstr := def.Instr
signed := extInstr.Opcode() == ssa.OpcodeSExtend
innerExtFromBits, innerExtToBits := extInstr.ExtendFromToBits()
modeBits, modeSigned := mode.bits(), mode.signed()
if mode == extModeNone || innerExtToBits == modeBits {
eop := extendOpFrom(signed, innerExtFromBits)
extArg := m.getOperand_NR(m.compiler.ValueDefinition(extInstr.Arg()), extModeNone)
op = operandER(extArg.nr(), eop, innerExtToBits)
extInstr.MarkLowered()
return
}
if innerExtToBits > modeBits {
panic("BUG?TODO?: need the results of inner extension to be larger than the mode")
}
switch {
case (!signed && !modeSigned) || (signed && modeSigned):
// Two sign/zero extensions are equivalent to one sign/zero extension for the larger size.
eop := extendOpFrom(modeSigned, innerExtFromBits)
op = operandER(m.compiler.VRegOf(extInstr.Arg()), eop, modeBits)
extInstr.MarkLowered()
case (signed && !modeSigned) || (!signed && modeSigned):
// We need to {sign, zero}-extend the result of the {zero,sign} extension.
eop := extendOpFrom(modeSigned, innerExtToBits)
op = operandER(m.compiler.VRegOf(extInstr.Return()), eop, modeBits)
// Note that we failed to merge the inner extension instruction this case.
}
return
}
return m.getOperand_SR_NR(def, mode)
}
// ensureValueNR returns an operand of either operandKindSR or operandKindNR from the given value (defined by `def).
//
// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
func (m *machine) getOperand_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
if def.IsFromBlockParam() {
return operandNR(def.BlkParamVReg)
}
if m.compiler.MatchInstr(def, ssa.OpcodeIshl) {
// Check if the shift amount is constant instruction.
targetVal, amountVal := def.Instr.Arg2()
targetVReg := m.getOperand_NR(m.compiler.ValueDefinition(targetVal), extModeNone).nr()
amountDef := m.compiler.ValueDefinition(amountVal)
if amountDef.IsFromInstr() && amountDef.Instr.Constant() {
// If that is the case, we can use the shifted register operand (SR).
c := byte(amountDef.Instr.ConstantVal()) & (targetVal.Type().Bits() - 1) // Clears the unnecessary bits.
def.Instr.MarkLowered()
amountDef.Instr.MarkLowered()
return operandSR(targetVReg, c, shiftOpLSL)
}
}
return m.getOperand_NR(def, mode)
}
// getOperand_ShiftImm_NR returns an operand of either operandKindShiftImm or operandKindNR from the given value (defined by `def).
func (m *machine) getOperand_ShiftImm_NR(def *backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) {
if def.IsFromBlockParam() {
return operandNR(def.BlkParamVReg)
}
instr := def.Instr
if instr.Constant() {
amount := byte(instr.ConstantVal()) & (shiftBitWidth - 1) // Clears the unnecessary bits.
return operandShiftImm(amount)
}
return m.getOperand_NR(def, mode)
}
// ensureValueNR returns an operand of operandKindNR from the given value (defined by `def).
//
// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
func (m *machine) getOperand_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
var v regalloc.VReg
if def.IsFromBlockParam() {
v = def.BlkParamVReg
} else {
instr := def.Instr
if instr.Constant() {
// We inline all the constant instructions so that we could reduce the register usage.
v = m.lowerConstant(instr)
instr.MarkLowered()
} else {
if n := def.N; n == 0 {
v = m.compiler.VRegOf(instr.Return())
} else {
_, rs := instr.Returns()
v = m.compiler.VRegOf(rs[n-1])
}
}
}
r := v
switch inBits := def.SSAValue().Type().Bits(); {
case mode == extModeNone:
case inBits == 32 && (mode == extModeZeroExtend32 || mode == extModeSignExtend32):
case inBits == 32 && mode == extModeZeroExtend64:
extended := m.compiler.AllocateVReg(ssa.TypeI64)
ext := m.allocateInstr()
ext.asExtend(extended, v, 32, 64, false)
m.insert(ext)
r = extended
case inBits == 32 && mode == extModeSignExtend64:
extended := m.compiler.AllocateVReg(ssa.TypeI64)
ext := m.allocateInstr()
ext.asExtend(extended, v, 32, 64, true)
m.insert(ext)
r = extended
case inBits == 64 && (mode == extModeZeroExtend64 || mode == extModeSignExtend64):
}
return operandNR(r)
}
func asImm12Operand(val uint64) (op operand, ok bool) {
v, shiftBit, ok := asImm12(val)
if !ok {
return operand{}, false
}
return operandImm12(v, shiftBit), true
}
func asImm12(val uint64) (v uint16, shiftBit byte, ok bool) {
const mask1, mask2 uint64 = 0xfff, 0xfff_000
if val&^mask1 == 0 {
return uint16(val), 0, true
} else if val&^mask2 == 0 {
return uint16(val >> 12), 1, true
} else {
return 0, 0, false
}
}

View File

@ -0,0 +1,440 @@
package arm64
import (
"fmt"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
type (
// addressMode represents an ARM64 addressing mode.
//
// https://developer.arm.com/documentation/102374/0101/Loads-and-stores---addressing
// TODO: use the bit-packed layout like operand struct.
addressMode struct {
kind addressModeKind
rn, rm regalloc.VReg
extOp extendOp
imm int64
}
// addressModeKind represents the kind of ARM64 addressing mode.
addressModeKind byte
)
const (
// addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended,
// and then scaled by bits(type)/8.
//
// e.g.
// - ldrh w1, [x2, w3, SXTW #1] ;; sign-extended and scaled by 2 (== LSL #1)
// - strh w1, [x2, w3, UXTW #1] ;; zero-extended and scaled by 2 (== LSL #1)
// - ldr w1, [x2, w3, SXTW #2] ;; sign-extended and scaled by 4 (== LSL #2)
// - str x1, [x2, w3, UXTW #3] ;; zero-extended and scaled by 8 (== LSL #3)
//
// See the following pages:
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--register---Load-Register-Halfword--register--
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--
addressModeKindRegScaledExtended addressModeKind = iota
// addressModeKindRegScaled is the same as addressModeKindRegScaledExtended, but without extension factor.
addressModeKindRegScaled
// addressModeKindRegScaled is the same as addressModeKindRegScaledExtended, but without scale factor.
addressModeKindRegExtended
// addressModeKindRegReg takes a base register and an index register. The index register is not either scaled or extended.
addressModeKindRegReg
// addressModeKindRegSignedImm9 takes a base register and a 9-bit "signed" immediate offset (-256 to 255).
// The immediate will be sign-extended, and be added to the base register.
// This is a.k.a. "unscaled" since the immediate is not scaled.
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--
addressModeKindRegSignedImm9
// addressModeKindRegUnsignedImm12 takes a base register and a 12-bit "unsigned" immediate offset. scaled by
// the size of the type. In other words, the actual offset will be imm12 * bits(type)/8.
// See "Unsigned offset" in the following pages:
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
addressModeKindRegUnsignedImm12
// addressModePostIndex takes a base register and a 9-bit "signed" immediate offset.
// After the load/store, the base register will be updated by the offset.
//
// Note that when this is used for pair load/store, the offset will be 7-bit "signed" immediate offset.
//
// See "Post-index" in the following pages for examples:
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-
addressModeKindPostIndex
// addressModePostIndex takes a base register and a 9-bit "signed" immediate offset.
// Before the load/store, the base register will be updated by the offset.
//
// Note that when this is used for pair load/store, the offset will be 7-bit "signed" immediate offset.
//
// See "Pre-index" in the following pages for examples:
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-
addressModeKindPreIndex
// addressModeKindArgStackSpace is used to resolve the address of the argument stack space
// exiting right above the stack pointer. Since we don't know the exact stack space needed for a function
// at a compilation phase, this is used as a placeholder and further lowered to a real addressing mode like above.
addressModeKindArgStackSpace
// addressModeKindResultStackSpace is used to resolve the address of the result stack space
// exiting right above the stack pointer. Since we don't know the exact stack space needed for a function
// at a compilation phase, this is used as a placeholder and further lowered to a real addressing mode like above.
addressModeKindResultStackSpace
)
func (a addressMode) format(dstSizeBits byte) (ret string) {
base := formatVRegSized(a.rn, 64)
if rn := a.rn; rn.RegType() != regalloc.RegTypeInt {
panic("invalid base register type: " + a.rn.RegType().String())
} else if rn.IsRealReg() && v0 <= a.rn.RealReg() && a.rn.RealReg() <= v30 {
panic("BUG: likely a bug in reg alloc or reset behavior")
}
switch a.kind {
case addressModeKindRegScaledExtended:
amount := a.sizeInBitsToShiftAmount(dstSizeBits)
ret = fmt.Sprintf("[%s, %s, %s #%#x]", base, formatVRegSized(a.rm, a.indexRegBits()), a.extOp, amount)
case addressModeKindRegScaled:
amount := a.sizeInBitsToShiftAmount(dstSizeBits)
ret = fmt.Sprintf("[%s, %s, lsl #%#x]", base, formatVRegSized(a.rm, a.indexRegBits()), amount)
case addressModeKindRegExtended:
ret = fmt.Sprintf("[%s, %s, %s]", base, formatVRegSized(a.rm, a.indexRegBits()), a.extOp)
case addressModeKindRegReg:
ret = fmt.Sprintf("[%s, %s]", base, formatVRegSized(a.rm, a.indexRegBits()))
case addressModeKindRegSignedImm9:
if a.imm != 0 {
ret = fmt.Sprintf("[%s, #%#x]", base, a.imm)
} else {
ret = fmt.Sprintf("[%s]", base)
}
case addressModeKindRegUnsignedImm12:
if a.imm != 0 {
ret = fmt.Sprintf("[%s, #%#x]", base, a.imm)
} else {
ret = fmt.Sprintf("[%s]", base)
}
case addressModeKindPostIndex:
ret = fmt.Sprintf("[%s], #%#x", base, a.imm)
case addressModeKindPreIndex:
ret = fmt.Sprintf("[%s, #%#x]!", base, a.imm)
case addressModeKindArgStackSpace:
ret = fmt.Sprintf("[#arg_space, #%#x]", a.imm)
case addressModeKindResultStackSpace:
ret = fmt.Sprintf("[#ret_space, #%#x]", a.imm)
}
return
}
func addressModePreOrPostIndex(rn regalloc.VReg, imm int64, preIndex bool) addressMode {
if !offsetFitsInAddressModeKindRegSignedImm9(imm) {
panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm))
}
if preIndex {
return addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm}
} else {
return addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm}
}
}
func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool {
divisor := int64(dstSizeInBits) / 8
return 0 < offset && offset%divisor == 0 && offset/divisor < 4096
}
func offsetFitsInAddressModeKindRegSignedImm9(offset int64) bool {
return -256 <= offset && offset <= 255
}
func (a addressMode) indexRegBits() byte {
bits := a.extOp.srcBits()
if bits != 32 && bits != 64 {
panic("invalid index register for address mode. it must be either 32 or 64 bits")
}
return bits
}
func (a addressMode) sizeInBitsToShiftAmount(sizeInBits byte) (lsl byte) {
switch sizeInBits {
case 8:
lsl = 0
case 16:
lsl = 1
case 32:
lsl = 2
case 64:
lsl = 3
}
return
}
func extLoadSignSize(op ssa.Opcode) (size byte, signed bool) {
switch op {
case ssa.OpcodeUload8:
size, signed = 8, false
case ssa.OpcodeUload16:
size, signed = 16, false
case ssa.OpcodeUload32:
size, signed = 32, false
case ssa.OpcodeSload8:
size, signed = 8, true
case ssa.OpcodeSload16:
size, signed = 16, true
case ssa.OpcodeSload32:
size, signed = 32, true
default:
panic("BUG")
}
return
}
func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret regalloc.VReg) {
size, signed := extLoadSignSize(op)
amode := m.lowerToAddressMode(ptr, offset, size)
load := m.allocateInstr()
if signed {
load.asSLoad(operandNR(ret), amode, size)
} else {
load.asULoad(operandNR(ret), amode, size)
}
m.insert(load)
}
func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa.Value) {
amode := m.lowerToAddressMode(ptr, offset, typ.Bits())
dst := m.compiler.VRegOf(ret)
load := m.allocateInstr()
switch typ {
case ssa.TypeI32, ssa.TypeI64:
load.asULoad(operandNR(dst), amode, typ.Bits())
case ssa.TypeF32, ssa.TypeF64:
load.asFpuLoad(operandNR(dst), amode, typ.Bits())
case ssa.TypeV128:
load.asFpuLoad(operandNR(dst), amode, 128)
default:
panic("TODO")
}
m.insert(load)
}
func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane, ret ssa.Value) {
// vecLoad1R has offset address mode (base+imm) only for post index, so we simply add the offset to the base.
base := m.getOperand_NR(m.compiler.ValueDefinition(ptr), extModeNone).nr()
offsetReg := m.compiler.AllocateVReg(ssa.TypeI64)
m.lowerConstantI64(offsetReg, int64(offset))
addedBase := m.addReg64ToReg64(base, offsetReg)
rd := operandNR(m.compiler.VRegOf(ret))
ld1r := m.allocateInstr()
ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane))
m.insert(ld1r)
}
func (m *machine) lowerStore(si *ssa.Instruction) {
// TODO: merge consecutive stores into a single pair store instruction.
value, ptr, offset, storeSizeInBits := si.StoreData()
amode := m.lowerToAddressMode(ptr, offset, storeSizeInBits)
valueOp := m.getOperand_NR(m.compiler.ValueDefinition(value), extModeNone)
store := m.allocateInstr()
store.asStore(valueOp, amode, storeSizeInBits)
m.insert(store)
}
// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode addressMode) {
// TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and
// addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed
// to support more efficient address resolution.
a32s, a64s, offset := m.collectAddends(ptr)
offset += int64(offsetBase)
return m.lowerToAddressModeFromAddends(a32s, a64s, size, offset)
}
// lowerToAddressModeFromAddends creates an addressMode from a list of addends collected by collectAddends.
// During the construction, this might emit additional instructions.
//
// Extracted as a separate function for easy testing.
func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode addressMode) {
switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); {
case a64sExist && a32sExist:
var base regalloc.VReg
base = a64s.Dequeue()
var a32 addend32
a32 = a32s.Dequeue()
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset):
var base regalloc.VReg
base = a64s.Dequeue()
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
offset = 0
case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset):
var base regalloc.VReg
base = a64s.Dequeue()
amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
offset = 0
case a64sExist:
var base regalloc.VReg
base = a64s.Dequeue()
if !a64s.Empty() {
index := a64s.Dequeue()
amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
} else {
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
}
case a32sExist:
base32 := a32s.Dequeue()
// First we need 64-bit base.
base := m.compiler.AllocateVReg(ssa.TypeI64)
baseExt := m.allocateInstr()
var signed bool
if base32.ext == extendOpSXTW {
signed = true
}
baseExt.asExtend(base, base32.r, 32, 64, signed)
m.insert(baseExt)
if !a32s.Empty() {
index := a32s.Dequeue()
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
} else {
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
}
default: // Only static offsets.
tmpReg := m.compiler.AllocateVReg(ssa.TypeI64)
m.lowerConstantI64(tmpReg, offset)
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0}
offset = 0
}
baseReg := amode.rn
if offset > 0 {
baseReg = m.addConstToReg64(baseReg, offset) // baseReg += offset
}
for !a64s.Empty() {
a64 := a64s.Dequeue()
baseReg = m.addReg64ToReg64(baseReg, a64) // baseReg += a64
}
for !a32s.Empty() {
a32 := a32s.Dequeue()
baseReg = m.addRegToReg64Ext(baseReg, a32.r, a32.ext) // baseReg += (a32 extended to 64-bit)
}
amode.rn = baseReg
return
}
var addendsMatchOpcodes = [4]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst}
func (m *machine) collectAddends(ptr ssa.Value) (addends32 *wazevoapi.Queue[addend32], addends64 *wazevoapi.Queue[regalloc.VReg], offset int64) {
m.addendsWorkQueue.Reset()
m.addends32.Reset()
m.addends64.Reset()
m.addendsWorkQueue.Enqueue(ptr)
for !m.addendsWorkQueue.Empty() {
v := m.addendsWorkQueue.Dequeue()
def := m.compiler.ValueDefinition(v)
switch op := m.compiler.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op {
case ssa.OpcodeIadd:
// If the addend is an add, we recursively collect its operands.
x, y := def.Instr.Arg2()
m.addendsWorkQueue.Enqueue(x)
m.addendsWorkQueue.Enqueue(y)
def.Instr.MarkLowered()
case ssa.OpcodeIconst:
// If the addend is constant, we just statically merge it into the offset.
ic := def.Instr
u64 := ic.ConstantVal()
if ic.Return().Type().Bits() == 32 {
offset += int64(int32(u64)) // sign-extend.
} else {
offset += int64(u64)
}
def.Instr.MarkLowered()
case ssa.OpcodeUExtend, ssa.OpcodeSExtend:
input := def.Instr.Arg()
if input.Type().Bits() != 32 {
panic("illegal size: " + input.Type().String())
}
var ext extendOp
if op == ssa.OpcodeUExtend {
ext = extendOpUXTW
} else {
ext = extendOpSXTW
}
inputDef := m.compiler.ValueDefinition(input)
constInst := inputDef.IsFromInstr() && inputDef.Instr.Constant()
switch {
case constInst && ext == extendOpUXTW:
// Zero-extension of a 32-bit constant can be merged into the offset.
offset += int64(uint32(inputDef.Instr.ConstantVal()))
case constInst && ext == extendOpSXTW:
// Sign-extension of a 32-bit constant can be merged into the offset.
offset += int64(int32(inputDef.Instr.ConstantVal())) // sign-extend!
default:
m.addends32.Enqueue(addend32{r: m.getOperand_NR(inputDef, extModeNone).nr(), ext: ext})
}
def.Instr.MarkLowered()
continue
default:
// If the addend is not one of them, we simply use it as-is (without merging!), optionally zero-extending it.
m.addends64.Enqueue(m.getOperand_NR(def, extModeZeroExtend64 /* optional zero ext */).nr())
}
}
return &m.addends32, &m.addends64, offset
}
func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
rd = m.compiler.AllocateVReg(ssa.TypeI64)
alu := m.allocateInstr()
if imm12Op, ok := asImm12Operand(uint64(c)); ok {
alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), imm12Op, true)
} else if imm12Op, ok = asImm12Operand(uint64(-c)); ok {
alu.asALU(aluOpSub, operandNR(rd), operandNR(r), imm12Op, true)
} else {
tmp := m.compiler.AllocateVReg(ssa.TypeI64)
m.load64bitConst(c, tmp)
alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), operandNR(tmp), true)
}
m.insert(alu)
return
}
func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) {
rd = m.compiler.AllocateVReg(ssa.TypeI64)
alu := m.allocateInstr()
alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandNR(rm), true)
m.insert(alu)
return
}
func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) {
rd = m.compiler.AllocateVReg(ssa.TypeI64)
alu := m.allocateInstr()
alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandER(rm, ext, 64), true)
m.insert(alu)
return
}

View File

@ -0,0 +1,515 @@
package arm64
import (
"context"
"fmt"
"strings"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
type (
// machine implements backend.Machine.
machine struct {
compiler backend.Compiler
executableContext *backend.ExecutableContextT[instruction]
currentABI *backend.FunctionABI
regAlloc regalloc.Allocator
regAllocFn *backend.RegAllocFunction[*instruction, *machine]
// addendsWorkQueue is used during address lowering, defined here for reuse.
addendsWorkQueue wazevoapi.Queue[ssa.Value]
addends32 wazevoapi.Queue[addend32]
// addends64 is used during address lowering, defined here for reuse.
addends64 wazevoapi.Queue[regalloc.VReg]
unresolvedAddressModes []*instruction
// condBrRelocs holds the conditional branches which need offset relocation.
condBrRelocs []condBrReloc
// jmpTableTargets holds the labels of the jump table targets.
jmpTableTargets [][]uint32
// spillSlotSize is the size of the stack slot in bytes used for spilling registers.
// During the execution of the function, the stack looks like:
//
//
// (high address)
// +-----------------+
// | ....... |
// | ret Y |
// | ....... |
// | ret 0 |
// | arg X |
// | ....... |
// | arg 1 |
// | arg 0 |
// | xxxxx |
// | ReturnAddress |
// +-----------------+ <<-|
// | ........... | |
// | spill slot M | | <--- spillSlotSize
// | ............ | |
// | spill slot 2 | |
// | spill slot 1 | <<-+
// | clobbered N |
// | ........... |
// | clobbered 1 |
// | clobbered 0 |
// SP---> +-----------------+
// (low address)
//
// and it represents the size of the space between FP and the first spilled slot. This must be a multiple of 16.
// Also note that this is only known after register allocation.
spillSlotSize int64
spillSlots map[regalloc.VRegID]int64 // regalloc.VRegID to offset.
// clobberedRegs holds real-register backed VRegs saved at the function prologue, and restored at the epilogue.
clobberedRegs []regalloc.VReg
maxRequiredStackSizeForCalls int64
stackBoundsCheckDisabled bool
regAllocStarted bool
}
addend32 struct {
r regalloc.VReg
ext extendOp
}
condBrReloc struct {
cbr *instruction
// currentLabelPos is the labelPosition within which condBr is defined.
currentLabelPos *labelPosition
// Next block's labelPosition.
nextLabel label
offset int64
}
labelPosition = backend.LabelPosition[instruction]
label = backend.Label
)
const (
labelReturn = backend.LabelReturn
labelInvalid = backend.LabelInvalid
)
// NewBackend returns a new backend for arm64.
func NewBackend() backend.Machine {
m := &machine{
spillSlots: make(map[regalloc.VRegID]int64),
executableContext: newExecutableContext(),
regAlloc: regalloc.NewAllocator(regInfo),
}
return m
}
func newExecutableContext() *backend.ExecutableContextT[instruction] {
return backend.NewExecutableContextT[instruction](resetInstruction, setNext, setPrev, asNop0)
}
// ExecutableContext implements backend.Machine.
func (m *machine) ExecutableContext() backend.ExecutableContext {
return m.executableContext
}
// RegAlloc implements backend.Machine Function.
func (m *machine) RegAlloc() {
rf := m.regAllocFn
for _, pos := range m.executableContext.OrderedBlockLabels {
rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End)
}
m.regAllocStarted = true
m.regAlloc.DoAllocation(rf)
// Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes.
m.spillSlotSize = (m.spillSlotSize + 15) &^ 15
}
// Reset implements backend.Machine.
func (m *machine) Reset() {
m.clobberedRegs = m.clobberedRegs[:0]
for key := range m.spillSlots {
m.clobberedRegs = append(m.clobberedRegs, regalloc.VReg(key))
}
for _, key := range m.clobberedRegs {
delete(m.spillSlots, regalloc.VRegID(key))
}
m.clobberedRegs = m.clobberedRegs[:0]
m.regAllocStarted = false
m.regAlloc.Reset()
m.regAllocFn.Reset()
m.spillSlotSize = 0
m.unresolvedAddressModes = m.unresolvedAddressModes[:0]
m.maxRequiredStackSizeForCalls = 0
m.executableContext.Reset()
m.jmpTableTargets = m.jmpTableTargets[:0]
}
// SetCurrentABI implements backend.Machine SetCurrentABI.
func (m *machine) SetCurrentABI(abi *backend.FunctionABI) {
m.currentABI = abi
}
// DisableStackCheck implements backend.Machine DisableStackCheck.
func (m *machine) DisableStackCheck() {
m.stackBoundsCheckDisabled = true
}
// SetCompiler implements backend.Machine.
func (m *machine) SetCompiler(ctx backend.Compiler) {
m.compiler = ctx
m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, ctx.SSABuilder(), ctx)
}
func (m *machine) insert(i *instruction) {
ectx := m.executableContext
ectx.PendingInstructions = append(ectx.PendingInstructions, i)
}
func (m *machine) insertBrTargetLabel() label {
nop, l := m.allocateBrTarget()
m.insert(nop)
return l
}
func (m *machine) allocateBrTarget() (nop *instruction, l label) {
ectx := m.executableContext
l = ectx.AllocateLabel()
nop = m.allocateInstr()
nop.asNop0WithLabel(l)
pos := ectx.AllocateLabelPosition(l)
pos.Begin, pos.End = nop, nop
ectx.LabelPositions[l] = pos
return
}
// allocateInstr allocates an instruction.
func (m *machine) allocateInstr() *instruction {
instr := m.executableContext.InstructionPool.Allocate()
if !m.regAllocStarted {
instr.addedBeforeRegAlloc = true
}
return instr
}
func resetInstruction(i *instruction) {
*i = instruction{}
}
func (m *machine) allocateNop() *instruction {
instr := m.allocateInstr()
instr.asNop0()
return instr
}
func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) {
amode := &i.amode
switch amode.kind {
case addressModeKindResultStackSpace:
amode.imm += ret0offset
case addressModeKindArgStackSpace:
amode.imm += arg0offset
default:
panic("BUG")
}
var sizeInBits byte
switch i.kind {
case store8, uLoad8:
sizeInBits = 8
case store16, uLoad16:
sizeInBits = 16
case store32, fpuStore32, uLoad32, fpuLoad32:
sizeInBits = 32
case store64, fpuStore64, uLoad64, fpuLoad64:
sizeInBits = 64
case fpuStore128, fpuLoad128:
sizeInBits = 128
default:
panic("BUG")
}
if offsetFitsInAddressModeKindRegUnsignedImm12(sizeInBits, amode.imm) {
amode.kind = addressModeKindRegUnsignedImm12
} else {
// This case, we load the offset into the temporary register,
// and then use it as the index register.
newPrev := m.lowerConstantI64AndInsert(i.prev, tmpRegVReg, amode.imm)
linkInstr(newPrev, i)
*amode = addressMode{kind: addressModeKindRegReg, rn: amode.rn, rm: tmpRegVReg, extOp: extendOpUXTX /* indicates rm reg is 64-bit */}
}
}
// resolveRelativeAddresses resolves the relative addresses before encoding.
func (m *machine) resolveRelativeAddresses(ctx context.Context) {
ectx := m.executableContext
for {
if len(m.unresolvedAddressModes) > 0 {
arg0offset, ret0offset := m.arg0OffsetFromSP(), m.ret0OffsetFromSP()
for _, i := range m.unresolvedAddressModes {
m.resolveAddressingMode(arg0offset, ret0offset, i)
}
}
// Reuse the slice to gather the unresolved conditional branches.
m.condBrRelocs = m.condBrRelocs[:0]
var fn string
var fnIndex int
var labelToSSABlockID map[label]ssa.BasicBlockID
if wazevoapi.PerfMapEnabled {
fn = wazevoapi.GetCurrentFunctionName(ctx)
labelToSSABlockID = make(map[label]ssa.BasicBlockID)
for i, l := range ectx.SsaBlockIDToLabels {
labelToSSABlockID[l] = ssa.BasicBlockID(i)
}
fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx)
}
// Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label.
var offset int64
for i, pos := range ectx.OrderedBlockLabels {
pos.BinaryOffset = offset
var size int64
for cur := pos.Begin; ; cur = cur.next {
switch cur.kind {
case nop0:
l := cur.nop0Label()
if pos, ok := ectx.LabelPositions[l]; ok {
pos.BinaryOffset = offset + size
}
case condBr:
if !cur.condBrOffsetResolved() {
var nextLabel label
if i < len(ectx.OrderedBlockLabels)-1 {
// Note: this is only used when the block ends with fallthrough,
// therefore can be safely assumed that the next block exists when it's needed.
nextLabel = ectx.OrderedBlockLabels[i+1].L
}
m.condBrRelocs = append(m.condBrRelocs, condBrReloc{
cbr: cur, currentLabelPos: pos, offset: offset + size,
nextLabel: nextLabel,
})
}
}
size += cur.size()
if cur == pos.End {
break
}
}
if wazevoapi.PerfMapEnabled {
if size > 0 {
l := pos.L
var labelStr string
if blkID, ok := labelToSSABlockID[l]; ok {
labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID)
} else {
labelStr = l.String()
}
wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr))
}
}
offset += size
}
// Before resolving any offsets, we need to check if all the conditional branches can be resolved.
var needRerun bool
for i := range m.condBrRelocs {
reloc := &m.condBrRelocs[i]
cbr := reloc.cbr
offset := reloc.offset
target := cbr.condBrLabel()
offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
diff := offsetOfTarget - offset
if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
// This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block,
// and jump to it.
m.insertConditionalJumpTrampoline(cbr, reloc.currentLabelPos, reloc.nextLabel)
// Then, we need to recall this function to fix up the label offsets
// as they have changed after the trampoline is inserted.
needRerun = true
}
}
if needRerun {
if wazevoapi.PerfMapEnabled {
wazevoapi.PerfMap.Clear()
}
} else {
break
}
}
var currentOffset int64
for cur := ectx.RootInstr; cur != nil; cur = cur.next {
switch cur.kind {
case br:
target := cur.brLabel()
offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
diff := offsetOfTarget - currentOffset
divided := diff >> 2
if divided < minSignedInt26 || divided > maxSignedInt26 {
// This means the currently compiled single function is extremely large.
panic("too large function that requires branch relocation of large unconditional branch larger than 26-bit range")
}
cur.brOffsetResolve(diff)
case condBr:
if !cur.condBrOffsetResolved() {
target := cur.condBrLabel()
offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
diff := offsetOfTarget - currentOffset
if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly")
}
cur.condBrOffsetResolve(diff)
}
case brTableSequence:
tableIndex := cur.u1
targets := m.jmpTableTargets[tableIndex]
for i := range targets {
l := label(targets[i])
offsetOfTarget := ectx.LabelPositions[l].BinaryOffset
diff := offsetOfTarget - (currentOffset + brTableSequenceOffsetTableBegin)
targets[i] = uint32(diff)
}
cur.brTableSequenceOffsetsResolved()
case emitSourceOffsetInfo:
m.compiler.AddSourceOffsetInfo(currentOffset, cur.sourceOffsetInfo())
}
currentOffset += cur.size()
}
}
const (
maxSignedInt26 = 1<<25 - 1
minSignedInt26 = -(1 << 25)
maxSignedInt19 = 1<<18 - 1
minSignedInt19 = -(1 << 18)
)
func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) {
cur := currentBlk.End
originalTarget := cbr.condBrLabel()
endNext := cur.next
if cur.kind != br {
// If the current block ends with a conditional branch, we can just insert the trampoline after it.
// Otherwise, we need to insert "skip" instruction to skip the trampoline instructions.
skip := m.allocateInstr()
skip.asBr(nextLabel)
cur = linkInstr(cur, skip)
}
cbrNewTargetInstr, cbrNewTargetLabel := m.allocateBrTarget()
cbr.setCondBrTargets(cbrNewTargetLabel)
cur = linkInstr(cur, cbrNewTargetInstr)
// Then insert the unconditional branch to the original, which should be possible to get encoded
// as 26-bit offset should be enough for any practical application.
br := m.allocateInstr()
br.asBr(originalTarget)
cur = linkInstr(cur, br)
// Update the end of the current block.
currentBlk.End = cur
linkInstr(cur, endNext)
}
// Format implements backend.Machine.
func (m *machine) Format() string {
ectx := m.executableContext
begins := map[*instruction]label{}
for l, pos := range ectx.LabelPositions {
begins[pos.Begin] = l
}
irBlocks := map[label]ssa.BasicBlockID{}
for i, l := range ectx.SsaBlockIDToLabels {
irBlocks[l] = ssa.BasicBlockID(i)
}
var lines []string
for cur := ectx.RootInstr; cur != nil; cur = cur.next {
if l, ok := begins[cur]; ok {
var labelStr string
if blkID, ok := irBlocks[l]; ok {
labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID)
} else {
labelStr = fmt.Sprintf("%s:", l)
}
lines = append(lines, labelStr)
}
if cur.kind == nop0 {
continue
}
lines = append(lines, "\t"+cur.String())
}
return "\n" + strings.Join(lines, "\n") + "\n"
}
// InsertReturn implements backend.Machine.
func (m *machine) InsertReturn() {
i := m.allocateInstr()
i.asRet()
m.insert(i)
}
func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 {
offset, ok := m.spillSlots[id]
if !ok {
offset = m.spillSlotSize
// TODO: this should be aligned depending on the `size` to use Imm12 offset load/store as much as possible.
m.spillSlots[id] = offset
m.spillSlotSize += int64(size)
}
return offset + 16 // spill slot starts above the clobbered registers and the frame size.
}
func (m *machine) clobberedRegSlotSize() int64 {
return int64(len(m.clobberedRegs) * 16)
}
func (m *machine) arg0OffsetFromSP() int64 {
return m.frameSize() +
16 + // 16-byte aligned return address
16 // frame size saved below the clobbered registers.
}
func (m *machine) ret0OffsetFromSP() int64 {
return m.arg0OffsetFromSP() + m.currentABI.ArgStackSize
}
func (m *machine) requiredStackSize() int64 {
return m.maxRequiredStackSizeForCalls +
m.frameSize() +
16 + // 16-byte aligned return address.
16 // frame size saved below the clobbered registers.
}
func (m *machine) frameSize() int64 {
s := m.clobberedRegSlotSize() + m.spillSlotSize
if s&0xf != 0 {
panic(fmt.Errorf("BUG: frame size %d is not 16-byte aligned", s))
}
return s
}
func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) {
// TODO: reuse the slice!
labels := make([]uint32, len(targets))
for j, target := range targets {
labels[j] = uint32(m.executableContext.GetOrAllocateSSABlockLabel(target))
}
index = len(m.jmpTableTargets)
m.jmpTableTargets = append(m.jmpTableTargets, labels)
return
}

View File

@ -0,0 +1,469 @@
package arm64
import (
"fmt"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
// PostRegAlloc implements backend.Machine.
func (m *machine) PostRegAlloc() {
m.setupPrologue()
m.postRegAlloc()
}
// setupPrologue initializes the prologue of the function.
func (m *machine) setupPrologue() {
ectx := m.executableContext
cur := ectx.RootInstr
prevInitInst := cur.next
//
// (high address) (high address)
// SP----> +-----------------+ +------------------+ <----+
// | ....... | | ....... | |
// | ret Y | | ret Y | |
// | ....... | | ....... | |
// | ret 0 | | ret 0 | |
// | arg X | | arg X | | size_of_arg_ret.
// | ....... | ====> | ....... | |
// | arg 1 | | arg 1 | |
// | arg 0 | | arg 0 | <----+
// |-----------------| | size_of_arg_ret |
// | return address |
// +------------------+ <---- SP
// (low address) (low address)
// Saves the return address (lr) and the size_of_arg_ret below the SP.
// size_of_arg_ret is used for stack unwinding.
cur = m.createReturnAddrAndSizeOfArgRetSlot(cur)
if !m.stackBoundsCheckDisabled {
cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur)
}
// Decrement SP if spillSlotSize > 0.
if m.spillSlotSize == 0 && len(m.spillSlots) != 0 {
panic(fmt.Sprintf("BUG: spillSlotSize=%d, spillSlots=%v\n", m.spillSlotSize, m.spillSlots))
}
if regs := m.clobberedRegs; len(regs) > 0 {
//
// (high address) (high address)
// +-----------------+ +-----------------+
// | ....... | | ....... |
// | ret Y | | ret Y |
// | ....... | | ....... |
// | ret 0 | | ret 0 |
// | arg X | | arg X |
// | ....... | | ....... |
// | arg 1 | | arg 1 |
// | arg 0 | | arg 0 |
// | size_of_arg_ret | | size_of_arg_ret |
// | ReturnAddress | | ReturnAddress |
// SP----> +-----------------+ ====> +-----------------+
// (low address) | clobbered M |
// | ............ |
// | clobbered 0 |
// +-----------------+ <----- SP
// (low address)
//
_amode := addressModePreOrPostIndex(spVReg,
-16, // stack pointer must be 16-byte aligned.
true, // Decrement before store.
)
for _, vr := range regs {
// TODO: pair stores to reduce the number of instructions.
store := m.allocateInstr()
store.asStore(operandNR(vr), _amode, regTypeToRegisterSizeInBits(vr.RegType()))
cur = linkInstr(cur, store)
}
}
if size := m.spillSlotSize; size > 0 {
// Check if size is 16-byte aligned.
if size&0xf != 0 {
panic(fmt.Errorf("BUG: spill slot size %d is not 16-byte aligned", size))
}
cur = m.addsAddOrSubStackPointer(cur, spVReg, size, false)
// At this point, the stack looks like:
//
// (high address)
// +------------------+
// | ....... |
// | ret Y |
// | ....... |
// | ret 0 |
// | arg X |
// | ....... |
// | arg 1 |
// | arg 0 |
// | size_of_arg_ret |
// | ReturnAddress |
// +------------------+
// | clobbered M |
// | ............ |
// | clobbered 0 |
// | spill slot N |
// | ............ |
// | spill slot 2 |
// | spill slot 0 |
// SP----> +------------------+
// (low address)
}
// We push the frame size into the stack to make it possible to unwind stack:
//
//
// (high address) (high address)
// +-----------------+ +-----------------+
// | ....... | | ....... |
// | ret Y | | ret Y |
// | ....... | | ....... |
// | ret 0 | | ret 0 |
// | arg X | | arg X |
// | ....... | | ....... |
// | arg 1 | | arg 1 |
// | arg 0 | | arg 0 |
// | size_of_arg_ret | | size_of_arg_ret |
// | ReturnAddress | | ReturnAddress |
// +-----------------+ ==> +-----------------+ <----+
// | clobbered M | | clobbered M | |
// | ............ | | ............ | |
// | clobbered 2 | | clobbered 2 | |
// | clobbered 1 | | clobbered 1 | | frame size
// | clobbered 0 | | clobbered 0 | |
// | spill slot N | | spill slot N | |
// | ............ | | ............ | |
// | spill slot 0 | | spill slot 0 | <----+
// SP---> +-----------------+ | xxxxxx | ;; unused space to make it 16-byte aligned.
// | frame_size |
// +-----------------+ <---- SP
// (low address)
//
cur = m.createFrameSizeSlot(cur, m.frameSize())
linkInstr(cur, prevInitInst)
}
func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruction {
// First we decrement the stack pointer to point the arg0 slot.
var sizeOfArgRetReg regalloc.VReg
s := int64(m.currentABI.AlignedArgResultStackSlotSize())
if s > 0 {
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s)
sizeOfArgRetReg = tmpRegVReg
subSp := m.allocateInstr()
subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true)
cur = linkInstr(cur, subSp)
} else {
sizeOfArgRetReg = xzrVReg
}
// Saves the return address (lr) and the size_of_arg_ret below the SP.
// size_of_arg_ret is used for stack unwinding.
pstr := m.allocateInstr()
amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */)
pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode)
cur = linkInstr(cur, pstr)
return cur
}
func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction {
var frameSizeReg regalloc.VReg
if s > 0 {
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s)
frameSizeReg = tmpRegVReg
} else {
frameSizeReg = xzrVReg
}
_amode := addressModePreOrPostIndex(spVReg,
-16, // stack pointer must be 16-byte aligned.
true, // Decrement before store.
)
store := m.allocateInstr()
store.asStore(operandNR(frameSizeReg), _amode, 64)
cur = linkInstr(cur, store)
return cur
}
// postRegAlloc does multiple things while walking through the instructions:
// 1. Removes the redundant copy instruction.
// 2. Inserts the epilogue.
func (m *machine) postRegAlloc() {
ectx := m.executableContext
for cur := ectx.RootInstr; cur != nil; cur = cur.next {
switch cur.kind {
case ret:
m.setupEpilogueAfter(cur.prev)
case loadConstBlockArg:
lc := cur
next := lc.next
m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]
m.lowerLoadConstantBlockArgAfterRegAlloc(lc)
for _, instr := range m.executableContext.PendingInstructions {
cur = linkInstr(cur, instr)
}
linkInstr(cur, next)
m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]
default:
// Removes the redundant copy instruction.
if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() {
prev, next := cur.prev, cur.next
// Remove the copy instruction.
prev.next = next
if next != nil {
next.prev = prev
}
}
}
}
}
func (m *machine) setupEpilogueAfter(cur *instruction) {
prevNext := cur.next
// We've stored the frame size in the prologue, and now that we are about to return from this function, we won't need it anymore.
cur = m.addsAddOrSubStackPointer(cur, spVReg, 16, true)
if s := m.spillSlotSize; s > 0 {
// Adjust SP to the original value:
//
// (high address) (high address)
// +-----------------+ +-----------------+
// | ....... | | ....... |
// | ret Y | | ret Y |
// | ....... | | ....... |
// | ret 0 | | ret 0 |
// | arg X | | arg X |
// | ....... | | ....... |
// | arg 1 | | arg 1 |
// | arg 0 | | arg 0 |
// | xxxxx | | xxxxx |
// | ReturnAddress | | ReturnAddress |
// +-----------------+ ====> +-----------------+
// | clobbered M | | clobbered M |
// | ............ | | ............ |
// | clobbered 1 | | clobbered 1 |
// | clobbered 0 | | clobbered 0 |
// | spill slot N | +-----------------+ <---- SP
// | ............ |
// | spill slot 0 |
// SP---> +-----------------+
// (low address)
//
cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
}
// First we need to restore the clobbered registers.
if len(m.clobberedRegs) > 0 {
// (high address)
// +-----------------+ +-----------------+
// | ....... | | ....... |
// | ret Y | | ret Y |
// | ....... | | ....... |
// | ret 0 | | ret 0 |
// | arg X | | arg X |
// | ....... | | ....... |
// | arg 1 | | arg 1 |
// | arg 0 | | arg 0 |
// | xxxxx | | xxxxx |
// | ReturnAddress | | ReturnAddress |
// +-----------------+ ========> +-----------------+ <---- SP
// | clobbered M |
// | ........... |
// | clobbered 1 |
// | clobbered 0 |
// SP---> +-----------------+
// (low address)
l := len(m.clobberedRegs) - 1
for i := range m.clobberedRegs {
vr := m.clobberedRegs[l-i] // reverse order to restore.
load := m.allocateInstr()
amode := addressModePreOrPostIndex(spVReg,
16, // stack pointer must be 16-byte aligned.
false, // Increment after store.
)
// TODO: pair loads to reduce the number of instructions.
switch regTypeToRegisterSizeInBits(vr.RegType()) {
case 64: // save int reg.
load.asULoad(operandNR(vr), amode, 64)
case 128: // save vector reg.
load.asFpuLoad(operandNR(vr), amode, 128)
}
cur = linkInstr(cur, load)
}
}
// Reload the return address (lr).
//
// +-----------------+ +-----------------+
// | ....... | | ....... |
// | ret Y | | ret Y |
// | ....... | | ....... |
// | ret 0 | | ret 0 |
// | arg X | | arg X |
// | ....... | ===> | ....... |
// | arg 1 | | arg 1 |
// | arg 0 | | arg 0 |
// | xxxxx | +-----------------+ <---- SP
// | ReturnAddress |
// SP----> +-----------------+
ldr := m.allocateInstr()
ldr.asULoad(operandNR(lrVReg),
addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
cur = linkInstr(cur, ldr)
if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 {
cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
}
linkInstr(cur, prevNext)
}
// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
// stack space left. Basically this is the combination of CalleeSavedRegisters plus argument registers execpt for x0,
// which always points to the execution context whenever the native code is entered from Go.
var saveRequiredRegs = []regalloc.VReg{
x1VReg, x2VReg, x3VReg, x4VReg, x5VReg, x6VReg, x7VReg,
x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg, lrVReg,
v0VReg, v1VReg, v2VReg, v3VReg, v4VReg, v5VReg, v6VReg, v7VReg,
v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg,
}
// insertStackBoundsCheck will insert the instructions after `cur` to check the
// stack bounds, and if there's no sufficient spaces required for the function,
// exit the execution and try growing it in Go world.
//
// TODO: we should be able to share the instructions across all the functions to reduce the size of compiled executable.
func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction {
if requiredStackSize%16 != 0 {
panic("BUG")
}
if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok {
// sub tmp, sp, #requiredStackSize
sub := m.allocateInstr()
sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true)
cur = linkInstr(cur, sub)
} else {
// This case, we first load the requiredStackSize into the temporary register,
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
// Then subtract it.
sub := m.allocateInstr()
sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true)
cur = linkInstr(cur, sub)
}
tmp2 := x11VReg // Caller save, so it is safe to use it here in the prologue.
// ldr tmp2, [executionContext #StackBottomPtr]
ldr := m.allocateInstr()
ldr.asULoad(operandNR(tmp2), addressMode{
kind: addressModeKindRegUnsignedImm12,
rn: x0VReg, // execution context is always the first argument.
imm: wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(),
}, 64)
cur = linkInstr(cur, ldr)
// subs xzr, tmp, tmp2
subs := m.allocateInstr()
subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true)
cur = linkInstr(cur, subs)
// b.ge #imm
cbr := m.allocateInstr()
cbr.asCondBr(ge.asCond(), labelInvalid, false /* ignored */)
cur = linkInstr(cur, cbr)
// Set the required stack size and set it to the exec context.
{
// First load the requiredStackSize into the temporary register,
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
setRequiredStackSize := m.allocateInstr()
setRequiredStackSize.asStore(operandNR(tmpRegVReg),
addressMode{
kind: addressModeKindRegUnsignedImm12,
// Execution context is always the first argument.
rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(),
}, 64)
cur = linkInstr(cur, setRequiredStackSize)
}
ldrAddress := m.allocateInstr()
ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{
kind: addressModeKindRegUnsignedImm12,
rn: x0VReg, // execution context is always the first argument
imm: wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(),
}, 64)
cur = linkInstr(cur, ldrAddress)
// Then jumps to the stack grow call sequence's address, meaning
// transferring the control to the code compiled by CompileStackGrowCallSequence.
bl := m.allocateInstr()
bl.asCallIndirect(tmpRegVReg, nil)
cur = linkInstr(cur, bl)
// Now that we know the entire code, we can finalize how many bytes
// we have to skip when the stack size is sufficient.
var cbrOffset int64
for _cur := cbr; ; _cur = _cur.next {
cbrOffset += _cur.size()
if _cur == cur {
break
}
}
cbr.condBrOffsetResolve(cbrOffset)
return cur
}
// CompileStackGrowCallSequence implements backend.Machine.
func (m *machine) CompileStackGrowCallSequence() []byte {
ectx := m.executableContext
cur := m.allocateInstr()
cur.asNop0()
ectx.RootInstr = cur
// Save the callee saved and argument registers.
cur = m.saveRegistersInExecutionContext(cur, saveRequiredRegs)
// Save the current stack pointer.
cur = m.saveCurrentStackPointer(cur, x0VReg)
// Set the exit status on the execution context.
cur = m.setExitCode(cur, x0VReg, wazevoapi.ExitCodeGrowStack)
// Exit the execution.
cur = m.storeReturnAddressAndExit(cur)
// After the exit, restore the saved registers.
cur = m.restoreRegistersInExecutionContext(cur, saveRequiredRegs)
// Then goes back the original address of this stack grow call.
ret := m.allocateInstr()
ret.asRet()
linkInstr(cur, ret)
m.encode(ectx.RootInstr)
return m.compiler.Buf()
}
func (m *machine) addsAddOrSubStackPointer(cur *instruction, rd regalloc.VReg, diff int64, add bool) *instruction {
ectx := m.executableContext
ectx.PendingInstructions = ectx.PendingInstructions[:0]
m.insertAddOrSubStackPointer(rd, diff, add)
for _, inserted := range ectx.PendingInstructions {
cur = linkInstr(cur, inserted)
}
return cur
}

View File

@ -0,0 +1,152 @@
package arm64
// This file implements the interfaces required for register allocations. See backend.RegAllocFunctionMachine.
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
// ClobberedRegisters implements backend.RegAllocFunctionMachine.
func (m *machine) ClobberedRegisters(regs []regalloc.VReg) {
m.clobberedRegs = append(m.clobberedRegs[:0], regs...)
}
// Swap implements backend.RegAllocFunctionMachine.
func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {
prevNext := cur.next
var mov1, mov2, mov3 *instruction
if x1.RegType() == regalloc.RegTypeInt {
if !tmp.Valid() {
tmp = tmpRegVReg
}
mov1 = m.allocateInstr().asMove64(tmp, x1)
mov2 = m.allocateInstr().asMove64(x1, x2)
mov3 = m.allocateInstr().asMove64(x2, tmp)
cur = linkInstr(cur, mov1)
cur = linkInstr(cur, mov2)
cur = linkInstr(cur, mov3)
linkInstr(cur, prevNext)
} else {
if !tmp.Valid() {
r2 := x2.RealReg()
// Temporarily spill x1 to stack.
cur = m.InsertStoreRegisterAt(x1, cur, true).prev
// Then move x2 to x1.
cur = linkInstr(cur, m.allocateInstr().asFpuMov128(x1, x2))
linkInstr(cur, prevNext)
// Then reload the original value on x1 from stack to r2.
m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true)
} else {
mov1 = m.allocateInstr().asFpuMov128(tmp, x1)
mov2 = m.allocateInstr().asFpuMov128(x1, x2)
mov3 = m.allocateInstr().asFpuMov128(x2, tmp)
cur = linkInstr(cur, mov1)
cur = linkInstr(cur, mov2)
cur = linkInstr(cur, mov3)
linkInstr(cur, prevNext)
}
}
}
// InsertMoveBefore implements backend.RegAllocFunctionMachine.
func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {
typ := src.RegType()
if typ != dst.RegType() {
panic("BUG: src and dst must have the same type")
}
mov := m.allocateInstr()
if typ == regalloc.RegTypeInt {
mov.asMove64(dst, src)
} else {
mov.asFpuMov128(dst, src)
}
cur := instr.prev
prevNext := cur.next
cur = linkInstr(cur, mov)
linkInstr(cur, prevNext)
}
// SSABlockLabel implements backend.RegAllocFunctionMachine.
func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label {
return m.executableContext.SsaBlockIDToLabels[id]
}
// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine.
func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
if !v.IsRealReg() {
panic("BUG: VReg must be backed by real reg to be stored")
}
typ := m.compiler.TypeOf(v)
var prevNext, cur *instruction
if after {
cur, prevNext = instr, instr.next
} else {
cur, prevNext = instr.prev, instr
}
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
var amode addressMode
cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
store := m.allocateInstr()
store.asStore(operandNR(v), amode, typ.Bits())
cur = linkInstr(cur, store)
return linkInstr(cur, prevNext)
}
// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine.
func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
if !v.IsRealReg() {
panic("BUG: VReg must be backed by real reg to be stored")
}
typ := m.compiler.TypeOf(v)
var prevNext, cur *instruction
if after {
cur, prevNext = instr, instr.next
} else {
cur, prevNext = instr.prev, instr
}
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
var amode addressMode
cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
load := m.allocateInstr()
switch typ {
case ssa.TypeI32, ssa.TypeI64:
load.asULoad(operandNR(v), amode, typ.Bits())
case ssa.TypeF32, ssa.TypeF64:
load.asFpuLoad(operandNR(v), amode, typ.Bits())
case ssa.TypeV128:
load.asFpuLoad(operandNR(v), amode, 128)
default:
panic("TODO")
}
cur = linkInstr(cur, load)
return linkInstr(cur, prevNext)
}
// LastInstrForInsertion implements backend.RegAllocFunctionMachine.
func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction {
cur := end
for cur.kind == nop0 {
cur = cur.prev
if cur == begin {
return end
}
}
switch cur.kind {
case br:
return cur
default:
return end
}
}

View File

@ -0,0 +1,117 @@
package arm64
import (
"encoding/binary"
"fmt"
"math"
"sort"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
)
const (
// trampolineCallSize is the size of the trampoline instruction sequence for each function in an island.
trampolineCallSize = 4*4 + 4 // Four instructions + 32-bit immediate.
// Unconditional branch offset is encoded as divided by 4 in imm26.
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en
maxUnconditionalBranchOffset = maxSignedInt26 * 4
minUnconditionalBranchOffset = minSignedInt26 * 4
// trampolineIslandInterval is the range of the trampoline island.
// Half of the range is used for the trampoline island, and the other half is used for the function.
trampolineIslandInterval = maxUnconditionalBranchOffset / 2
// maxNumFunctions explicitly specifies the maximum number of functions that can be allowed in a single executable.
maxNumFunctions = trampolineIslandInterval >> 6
// maxFunctionExecutableSize is the maximum size of a function that can exist in a trampoline island.
// Conservatively set to 1/4 of the trampoline island interval.
maxFunctionExecutableSize = trampolineIslandInterval >> 2
)
// CallTrampolineIslandInfo implements backend.Machine CallTrampolineIslandInfo.
func (m *machine) CallTrampolineIslandInfo(numFunctions int) (interval, size int, err error) {
if numFunctions > maxNumFunctions {
return 0, 0, fmt.Errorf("too many functions: %d > %d", numFunctions, maxNumFunctions)
}
return trampolineIslandInterval, trampolineCallSize * numFunctions, nil
}
// ResolveRelocations implements backend.Machine ResolveRelocations.
func (m *machine) ResolveRelocations(
refToBinaryOffset []int,
executable []byte,
relocations []backend.RelocationInfo,
callTrampolineIslandOffsets []int,
) {
for _, islandOffset := range callTrampolineIslandOffsets {
encodeCallTrampolineIsland(refToBinaryOffset, islandOffset, executable)
}
for _, r := range relocations {
instrOffset := r.Offset
calleeFnOffset := refToBinaryOffset[r.FuncRef]
diff := int64(calleeFnOffset) - (instrOffset)
// Check if the diff is within the range of the branch instruction.
if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
// Find the near trampoline island from callTrampolineIslandOffsets.
islandOffset := searchTrampolineIsland(callTrampolineIslandOffsets, int(instrOffset))
islandTargetOffset := islandOffset + trampolineCallSize*int(r.FuncRef)
diff = int64(islandTargetOffset) - (instrOffset)
if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
panic("BUG in trampoline placement")
}
}
binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(true, diff))
}
}
// encodeCallTrampolineIsland encodes a trampoline island for the given functions.
// Each island consists of a trampoline instruction sequence for each function.
// Each trampoline instruction sequence consists of 4 instructions + 32-bit immediate.
func encodeCallTrampolineIsland(refToBinaryOffset []int, islandOffset int, executable []byte) {
for i := 0; i < len(refToBinaryOffset); i++ {
trampolineOffset := islandOffset + trampolineCallSize*i
fnOffset := refToBinaryOffset[i]
diff := fnOffset - (trampolineOffset + 16)
if diff > math.MaxInt32 || diff < math.MinInt32 {
// This case even amd64 can't handle. 4GB is too big.
panic("too big binary")
}
// The tmpReg, tmpReg2 is safe to overwrite (in fact any caller-saved register is safe to use).
tmpReg, tmpReg2 := regNumberInEncoding[tmpRegVReg.RealReg()], regNumberInEncoding[x11]
// adr tmpReg, PC+16: load the address of #diff into tmpReg.
binary.LittleEndian.PutUint32(executable[trampolineOffset:], encodeAdr(tmpReg, 16))
// ldrsw tmpReg2, [tmpReg]: Load #diff into tmpReg2.
binary.LittleEndian.PutUint32(executable[trampolineOffset+4:],
encodeLoadOrStore(sLoad32, tmpReg2, addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpRegVReg}))
// add tmpReg, tmpReg2, tmpReg: add #diff to the address of #diff, getting the absolute address of the function.
binary.LittleEndian.PutUint32(executable[trampolineOffset+8:],
encodeAluRRR(aluOpAdd, tmpReg, tmpReg, tmpReg2, true, false))
// br tmpReg: branch to the function without overwriting the link register.
binary.LittleEndian.PutUint32(executable[trampolineOffset+12:], encodeUnconditionalBranchReg(tmpReg, false))
// #diff
binary.LittleEndian.PutUint32(executable[trampolineOffset+16:], uint32(diff))
}
}
// searchTrampolineIsland finds the nearest trampoline island from callTrampolineIslandOffsets.
// Note that even if the offset is in the middle of two islands, it returns the latter one.
// That is ok because the island is always placed in the middle of the range.
//
// precondition: callTrampolineIslandOffsets is sorted in ascending order.
func searchTrampolineIsland(callTrampolineIslandOffsets []int, offset int) int {
l := len(callTrampolineIslandOffsets)
n := sort.Search(l, func(i int) bool {
return callTrampolineIslandOffsets[i] >= offset
})
if n == l {
n = l - 1
}
return callTrampolineIslandOffsets[n]
}

View File

@ -0,0 +1,397 @@
package arm64
import (
"fmt"
"strconv"
"strings"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
)
// Arm64-specific registers.
//
// See https://developer.arm.com/documentation/dui0801/a/Overview-of-AArch64-state/Predeclared-core-register-names-in-AArch64-state
const (
// General purpose registers. Note that we do not distinguish wn and xn registers
// because they are the same from the perspective of register allocator, and
// the size can be determined by the type of the instruction.
x0 = regalloc.RealRegInvalid + 1 + iota
x1
x2
x3
x4
x5
x6
x7
x8
x9
x10
x11
x12
x13
x14
x15
x16
x17
x18
x19
x20
x21
x22
x23
x24
x25
x26
x27
x28
x29
x30
// Vector registers. Note that we do not distinguish vn and dn, ... registers
// because they are the same from the perspective of register allocator, and
// the size can be determined by the type of the instruction.
v0
v1
v2
v3
v4
v5
v6
v7
v8
v9
v10
v11
v12
v13
v14
v15
v16
v17
v18
v19
v20
v21
v22
v23
v24
v25
v26
v27
v28
v29
v30
v31
// Special registers
xzr
sp
lr = x30
fp = x29
tmp = x27
)
var (
x0VReg = regalloc.FromRealReg(x0, regalloc.RegTypeInt)
x1VReg = regalloc.FromRealReg(x1, regalloc.RegTypeInt)
x2VReg = regalloc.FromRealReg(x2, regalloc.RegTypeInt)
x3VReg = regalloc.FromRealReg(x3, regalloc.RegTypeInt)
x4VReg = regalloc.FromRealReg(x4, regalloc.RegTypeInt)
x5VReg = regalloc.FromRealReg(x5, regalloc.RegTypeInt)
x6VReg = regalloc.FromRealReg(x6, regalloc.RegTypeInt)
x7VReg = regalloc.FromRealReg(x7, regalloc.RegTypeInt)
x8VReg = regalloc.FromRealReg(x8, regalloc.RegTypeInt)
x9VReg = regalloc.FromRealReg(x9, regalloc.RegTypeInt)
x10VReg = regalloc.FromRealReg(x10, regalloc.RegTypeInt)
x11VReg = regalloc.FromRealReg(x11, regalloc.RegTypeInt)
x12VReg = regalloc.FromRealReg(x12, regalloc.RegTypeInt)
x13VReg = regalloc.FromRealReg(x13, regalloc.RegTypeInt)
x14VReg = regalloc.FromRealReg(x14, regalloc.RegTypeInt)
x15VReg = regalloc.FromRealReg(x15, regalloc.RegTypeInt)
x16VReg = regalloc.FromRealReg(x16, regalloc.RegTypeInt)
x17VReg = regalloc.FromRealReg(x17, regalloc.RegTypeInt)
x18VReg = regalloc.FromRealReg(x18, regalloc.RegTypeInt)
x19VReg = regalloc.FromRealReg(x19, regalloc.RegTypeInt)
x20VReg = regalloc.FromRealReg(x20, regalloc.RegTypeInt)
x21VReg = regalloc.FromRealReg(x21, regalloc.RegTypeInt)
x22VReg = regalloc.FromRealReg(x22, regalloc.RegTypeInt)
x23VReg = regalloc.FromRealReg(x23, regalloc.RegTypeInt)
x24VReg = regalloc.FromRealReg(x24, regalloc.RegTypeInt)
x25VReg = regalloc.FromRealReg(x25, regalloc.RegTypeInt)
x26VReg = regalloc.FromRealReg(x26, regalloc.RegTypeInt)
x27VReg = regalloc.FromRealReg(x27, regalloc.RegTypeInt)
x28VReg = regalloc.FromRealReg(x28, regalloc.RegTypeInt)
x29VReg = regalloc.FromRealReg(x29, regalloc.RegTypeInt)
x30VReg = regalloc.FromRealReg(x30, regalloc.RegTypeInt)
v0VReg = regalloc.FromRealReg(v0, regalloc.RegTypeFloat)
v1VReg = regalloc.FromRealReg(v1, regalloc.RegTypeFloat)
v2VReg = regalloc.FromRealReg(v2, regalloc.RegTypeFloat)
v3VReg = regalloc.FromRealReg(v3, regalloc.RegTypeFloat)
v4VReg = regalloc.FromRealReg(v4, regalloc.RegTypeFloat)
v5VReg = regalloc.FromRealReg(v5, regalloc.RegTypeFloat)
v6VReg = regalloc.FromRealReg(v6, regalloc.RegTypeFloat)
v7VReg = regalloc.FromRealReg(v7, regalloc.RegTypeFloat)
v8VReg = regalloc.FromRealReg(v8, regalloc.RegTypeFloat)
v9VReg = regalloc.FromRealReg(v9, regalloc.RegTypeFloat)
v10VReg = regalloc.FromRealReg(v10, regalloc.RegTypeFloat)
v11VReg = regalloc.FromRealReg(v11, regalloc.RegTypeFloat)
v12VReg = regalloc.FromRealReg(v12, regalloc.RegTypeFloat)
v13VReg = regalloc.FromRealReg(v13, regalloc.RegTypeFloat)
v14VReg = regalloc.FromRealReg(v14, regalloc.RegTypeFloat)
v15VReg = regalloc.FromRealReg(v15, regalloc.RegTypeFloat)
v16VReg = regalloc.FromRealReg(v16, regalloc.RegTypeFloat)
v17VReg = regalloc.FromRealReg(v17, regalloc.RegTypeFloat)
v18VReg = regalloc.FromRealReg(v18, regalloc.RegTypeFloat)
v19VReg = regalloc.FromRealReg(v19, regalloc.RegTypeFloat)
v20VReg = regalloc.FromRealReg(v20, regalloc.RegTypeFloat)
v21VReg = regalloc.FromRealReg(v21, regalloc.RegTypeFloat)
v22VReg = regalloc.FromRealReg(v22, regalloc.RegTypeFloat)
v23VReg = regalloc.FromRealReg(v23, regalloc.RegTypeFloat)
v24VReg = regalloc.FromRealReg(v24, regalloc.RegTypeFloat)
v25VReg = regalloc.FromRealReg(v25, regalloc.RegTypeFloat)
v26VReg = regalloc.FromRealReg(v26, regalloc.RegTypeFloat)
v27VReg = regalloc.FromRealReg(v27, regalloc.RegTypeFloat)
// lr (link register) holds the return address at the function entry.
lrVReg = x30VReg
// tmpReg is used to perform spill/load on large stack offsets, and load large constants.
// Therefore, be cautious to use this register in the middle of the compilation, especially before the register allocation.
// This is the same as golang/go, but it's only described in the source code:
// https://github.com/golang/go/blob/18e17e2cb12837ea2c8582ecdb0cc780f49a1aac/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go#L59
// https://github.com/golang/go/blob/18e17e2cb12837ea2c8582ecdb0cc780f49a1aac/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go#L13-L15
tmpRegVReg = regalloc.FromRealReg(tmp, regalloc.RegTypeInt)
v28VReg = regalloc.FromRealReg(v28, regalloc.RegTypeFloat)
v29VReg = regalloc.FromRealReg(v29, regalloc.RegTypeFloat)
v30VReg = regalloc.FromRealReg(v30, regalloc.RegTypeFloat)
v31VReg = regalloc.FromRealReg(v31, regalloc.RegTypeFloat)
xzrVReg = regalloc.FromRealReg(xzr, regalloc.RegTypeInt)
spVReg = regalloc.FromRealReg(sp, regalloc.RegTypeInt)
fpVReg = regalloc.FromRealReg(fp, regalloc.RegTypeInt)
)
var regNames = [...]string{
x0: "x0",
x1: "x1",
x2: "x2",
x3: "x3",
x4: "x4",
x5: "x5",
x6: "x6",
x7: "x7",
x8: "x8",
x9: "x9",
x10: "x10",
x11: "x11",
x12: "x12",
x13: "x13",
x14: "x14",
x15: "x15",
x16: "x16",
x17: "x17",
x18: "x18",
x19: "x19",
x20: "x20",
x21: "x21",
x22: "x22",
x23: "x23",
x24: "x24",
x25: "x25",
x26: "x26",
x27: "x27",
x28: "x28",
x29: "x29",
x30: "x30",
xzr: "xzr",
sp: "sp",
v0: "v0",
v1: "v1",
v2: "v2",
v3: "v3",
v4: "v4",
v5: "v5",
v6: "v6",
v7: "v7",
v8: "v8",
v9: "v9",
v10: "v10",
v11: "v11",
v12: "v12",
v13: "v13",
v14: "v14",
v15: "v15",
v16: "v16",
v17: "v17",
v18: "v18",
v19: "v19",
v20: "v20",
v21: "v21",
v22: "v22",
v23: "v23",
v24: "v24",
v25: "v25",
v26: "v26",
v27: "v27",
v28: "v28",
v29: "v29",
v30: "v30",
v31: "v31",
}
func formatVRegSized(r regalloc.VReg, size byte) (ret string) {
if r.IsRealReg() {
ret = regNames[r.RealReg()]
switch ret[0] {
case 'x':
switch size {
case 32:
ret = strings.Replace(ret, "x", "w", 1)
case 64:
default:
panic("BUG: invalid register size: " + strconv.Itoa(int(size)))
}
case 'v':
switch size {
case 32:
ret = strings.Replace(ret, "v", "s", 1)
case 64:
ret = strings.Replace(ret, "v", "d", 1)
case 128:
ret = strings.Replace(ret, "v", "q", 1)
default:
panic("BUG: invalid register size")
}
}
} else {
switch r.RegType() {
case regalloc.RegTypeInt:
switch size {
case 32:
ret = fmt.Sprintf("w%d?", r.ID())
case 64:
ret = fmt.Sprintf("x%d?", r.ID())
default:
panic("BUG: invalid register size: " + strconv.Itoa(int(size)))
}
case regalloc.RegTypeFloat:
switch size {
case 32:
ret = fmt.Sprintf("s%d?", r.ID())
case 64:
ret = fmt.Sprintf("d%d?", r.ID())
case 128:
ret = fmt.Sprintf("q%d?", r.ID())
default:
panic("BUG: invalid register size")
}
default:
panic(fmt.Sprintf("BUG: invalid register type: %d for %s", r.RegType(), r))
}
}
return
}
func formatVRegWidthVec(r regalloc.VReg, width vecArrangement) (ret string) {
var id string
wspec := strings.ToLower(width.String())
if r.IsRealReg() {
id = regNames[r.RealReg()][1:]
} else {
id = fmt.Sprintf("%d?", r.ID())
}
ret = fmt.Sprintf("%s%s", wspec, id)
return
}
func formatVRegVec(r regalloc.VReg, arr vecArrangement, index vecIndex) (ret string) {
id := fmt.Sprintf("v%d?", r.ID())
if r.IsRealReg() {
id = regNames[r.RealReg()]
}
ret = fmt.Sprintf("%s.%s", id, strings.ToLower(arr.String()))
if index != vecIndexNone {
ret += fmt.Sprintf("[%d]", index)
}
return
}
func regTypeToRegisterSizeInBits(r regalloc.RegType) byte {
switch r {
case regalloc.RegTypeInt:
return 64
case regalloc.RegTypeFloat:
return 128
default:
panic("BUG: invalid register type")
}
}
var regNumberInEncoding = [...]uint32{
x0: 0,
x1: 1,
x2: 2,
x3: 3,
x4: 4,
x5: 5,
x6: 6,
x7: 7,
x8: 8,
x9: 9,
x10: 10,
x11: 11,
x12: 12,
x13: 13,
x14: 14,
x15: 15,
x16: 16,
x17: 17,
x18: 18,
x19: 19,
x20: 20,
x21: 21,
x22: 22,
x23: 23,
x24: 24,
x25: 25,
x26: 26,
x27: 27,
x28: 28,
x29: 29,
x30: 30,
xzr: 31,
sp: 31,
v0: 0,
v1: 1,
v2: 2,
v3: 3,
v4: 4,
v5: 5,
v6: 6,
v7: 7,
v8: 8,
v9: 9,
v10: 10,
v11: 11,
v12: 12,
v13: 13,
v14: 14,
v15: 15,
v16: 16,
v17: 17,
v18: 18,
v19: 19,
v20: 20,
v21: 21,
v22: 22,
v23: 23,
v24: 24,
v25: 25,
v26: 26,
v27: 27,
v28: 28,
v29: 29,
v30: 30,
v31: 31,
}

View File

@ -0,0 +1,90 @@
package arm64
import (
"encoding/binary"
"reflect"
"unsafe"
"github.com/tetratelabs/wazero/internal/wasmdebug"
)
// UnwindStack implements wazevo.unwindStack.
func UnwindStack(sp, _, top uintptr, returnAddresses []uintptr) []uintptr {
l := int(top - sp)
var stackBuf []byte
{
// TODO: use unsafe.Slice after floor version is set to Go 1.20.
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))
hdr.Data = sp
hdr.Len = l
hdr.Cap = l
}
for i := uint64(0); i < uint64(l); {
// (high address)
// +-----------------+
// | ....... |
// | ret Y | <----+
// | ....... | |
// | ret 0 | |
// | arg X | | size_of_arg_ret
// | ....... | |
// | arg 1 | |
// | arg 0 | <----+
// | size_of_arg_ret |
// | ReturnAddress |
// +-----------------+ <----+
// | ........... | |
// | spill slot M | |
// | ............ | |
// | spill slot 2 | |
// | spill slot 1 | | frame size
// | spill slot 1 | |
// | clobbered N | |
// | ............ | |
// | clobbered 0 | <----+
// | xxxxxx | ;; unused space to make it 16-byte aligned.
// | frame_size |
// +-----------------+ <---- SP
// (low address)
frameSize := binary.LittleEndian.Uint64(stackBuf[i:])
i += frameSize +
16 // frame size + aligned space.
retAddr := binary.LittleEndian.Uint64(stackBuf[i:])
i += 8 // ret addr.
sizeOfArgRet := binary.LittleEndian.Uint64(stackBuf[i:])
i += 8 + sizeOfArgRet
returnAddresses = append(returnAddresses, uintptr(retAddr))
if len(returnAddresses) == wasmdebug.MaxFrames {
break
}
}
return returnAddresses
}
// GoCallStackView implements wazevo.goCallStackView.
func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
// (high address)
// +-----------------+ <----+
// | xxxxxxxxxxx | | ;; optional unused space to make it 16-byte aligned.
// ^ | arg[N]/ret[M] | |
// sliceSize | | ............ | | sliceSize
// | | arg[1]/ret[1] | |
// v | arg[0]/ret[0] | <----+
// | sliceSize |
// | frame_size |
// +-----------------+ <---- stackPointerBeforeGoCall
// (low address)
ptr := unsafe.Pointer(stackPointerBeforeGoCall)
size := *(*uint64)(unsafe.Add(ptr, 8))
var view []uint64
{
sh := (*reflect.SliceHeader)(unsafe.Pointer(&view))
sh.Data = uintptr(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize).
sh.Len = int(size)
sh.Cap = int(size)
}
return view
}

View File

@ -0,0 +1,100 @@
package backend
import (
"context"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
type (
// Machine is a backend for a specific ISA machine.
Machine interface {
ExecutableContext() ExecutableContext
// DisableStackCheck disables the stack check for the current compilation for debugging/testing.
DisableStackCheck()
// SetCurrentABI initializes the FunctionABI for the given signature.
SetCurrentABI(abi *FunctionABI)
// SetCompiler sets the compilation context used for the lifetime of Machine.
// This is only called once per Machine, i.e. before the first compilation.
SetCompiler(Compiler)
// LowerSingleBranch is called when the compilation of the given single branch is started.
LowerSingleBranch(b *ssa.Instruction)
// LowerConditionalBranch is called when the compilation of the given conditional branch is started.
LowerConditionalBranch(b *ssa.Instruction)
// LowerInstr is called for each instruction in the given block except for the ones marked as already lowered
// via Compiler.MarkLowered. The order is reverse, i.e. from the last instruction to the first one.
//
// Note: this can lower multiple instructions (which produce the inputs) at once whenever it's possible
// for optimization.
LowerInstr(*ssa.Instruction)
// Reset resets the machine state for the next compilation.
Reset()
// InsertMove inserts a move instruction from src to dst whose type is typ.
InsertMove(dst, src regalloc.VReg, typ ssa.Type)
// InsertReturn inserts the return instruction to return from the current function.
InsertReturn()
// InsertLoadConstantBlockArg inserts the instruction(s) to load the constant value into the given regalloc.VReg.
InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg)
// Format returns the string representation of the currently compiled machine code.
// This is only for testing purpose.
Format() string
// RegAlloc does the register allocation after lowering.
RegAlloc()
// PostRegAlloc does the post register allocation, e.g. setting up prologue/epilogue, redundant move elimination, etc.
PostRegAlloc()
// ResolveRelocations resolves the relocations after emitting machine code.
// * refToBinaryOffset: the map from the function reference (ssa.FuncRef) to the executable offset.
// * executable: the binary to resolve the relocations.
// * relocations: the relocations to resolve.
// * callTrampolineIslandOffsets: the offsets of the trampoline islands in the executable.
ResolveRelocations(
refToBinaryOffset []int,
executable []byte,
relocations []RelocationInfo,
callTrampolineIslandOffsets []int,
)
// Encode encodes the machine instructions to the Compiler.
Encode(ctx context.Context) error
// CompileGoFunctionTrampoline compiles the trampoline function to call a Go function of the given exit code and signature.
CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte
// CompileStackGrowCallSequence returns the sequence of instructions shared by all functions to
// call the stack grow builtin function.
CompileStackGrowCallSequence() []byte
// CompileEntryPreamble returns the sequence of instructions shared by multiple functions to
// enter the function from Go.
CompileEntryPreamble(signature *ssa.Signature) []byte
// LowerParams lowers the given parameters.
LowerParams(params []ssa.Value)
// LowerReturns lowers the given returns.
LowerReturns(returns []ssa.Value)
// ArgsResultsRegs returns the registers used for arguments and return values.
ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg)
// CallTrampolineIslandInfo returns the interval of the offset where the trampoline island is placed, and
// the size of the trampoline island. If islandSize is zero, the trampoline island is not used on this machine.
CallTrampolineIslandInfo(numFunctions int) (interval, islandSize int, err error)
}
)

View File

@ -0,0 +1,319 @@
package backend
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
// RegAllocFunctionMachine is the interface for the machine specific logic that will be used in RegAllocFunction.
type RegAllocFunctionMachine[I regalloc.InstrConstraint] interface {
// InsertMoveBefore inserts the move instruction from src to dst before the given instruction.
InsertMoveBefore(dst, src regalloc.VReg, instr I)
// InsertStoreRegisterAt inserts the instruction(s) to store the given virtual register at the given instruction.
// If after is true, the instruction(s) will be inserted after the given instruction, otherwise before.
InsertStoreRegisterAt(v regalloc.VReg, instr I, after bool) I
// InsertReloadRegisterAt inserts the instruction(s) to reload the given virtual register at the given instruction.
// If after is true, the instruction(s) will be inserted after the given instruction, otherwise before.
InsertReloadRegisterAt(v regalloc.VReg, instr I, after bool) I
// ClobberedRegisters is called when the register allocation is done and the clobbered registers are known.
ClobberedRegisters(regs []regalloc.VReg)
// Swap swaps the two virtual registers after the given instruction.
Swap(cur I, x1, x2, tmp regalloc.VReg)
// LastInstrForInsertion implements LastInstrForInsertion of regalloc.Function. See its comment for details.
LastInstrForInsertion(begin, end I) I
// SSABlockLabel returns the label of the given ssa.BasicBlockID.
SSABlockLabel(id ssa.BasicBlockID) Label
}
type (
// RegAllocFunction implements regalloc.Function.
RegAllocFunction[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct {
m m
ssb ssa.Builder
c Compiler
// iter is the iterator for reversePostOrderBlocks
iter int
reversePostOrderBlocks []RegAllocBlock[I, m]
// labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks.
labelToRegAllocBlockIndex map[Label]int
loopNestingForestRoots []ssa.BasicBlock
}
// RegAllocBlock implements regalloc.Block.
RegAllocBlock[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct {
// f is the function this instruction belongs to. Used to reuse the regAllocFunctionImpl.predsSlice slice for Defs() and Uses().
f *RegAllocFunction[I, m]
sb ssa.BasicBlock
l Label
begin, end I
loopNestingForestChildren []ssa.BasicBlock
cur I
id int
cachedLastInstrForInsertion I
}
)
// NewRegAllocFunction returns a new RegAllocFunction.
func NewRegAllocFunction[I regalloc.InstrConstraint, M RegAllocFunctionMachine[I]](m M, ssb ssa.Builder, c Compiler) *RegAllocFunction[I, M] {
return &RegAllocFunction[I, M]{
m: m,
ssb: ssb,
c: c,
labelToRegAllocBlockIndex: make(map[Label]int),
}
}
// AddBlock adds a new block to the function.
func (f *RegAllocFunction[I, M]) AddBlock(sb ssa.BasicBlock, l Label, begin, end I) {
i := len(f.reversePostOrderBlocks)
f.reversePostOrderBlocks = append(f.reversePostOrderBlocks, RegAllocBlock[I, M]{
f: f,
sb: sb,
l: l,
begin: begin,
end: end,
id: int(sb.ID()),
})
f.labelToRegAllocBlockIndex[l] = i
}
// Reset resets the function for the next compilation.
func (f *RegAllocFunction[I, M]) Reset() {
f.reversePostOrderBlocks = f.reversePostOrderBlocks[:0]
f.iter = 0
}
// StoreRegisterAfter implements regalloc.Function StoreRegisterAfter.
func (f *RegAllocFunction[I, M]) StoreRegisterAfter(v regalloc.VReg, instr regalloc.Instr) {
m := f.m
m.InsertStoreRegisterAt(v, instr.(I), true)
}
// ReloadRegisterBefore implements regalloc.Function ReloadRegisterBefore.
func (f *RegAllocFunction[I, M]) ReloadRegisterBefore(v regalloc.VReg, instr regalloc.Instr) {
m := f.m
m.InsertReloadRegisterAt(v, instr.(I), false)
}
// ReloadRegisterAfter implements regalloc.Function ReloadRegisterAfter.
func (f *RegAllocFunction[I, M]) ReloadRegisterAfter(v regalloc.VReg, instr regalloc.Instr) {
m := f.m
m.InsertReloadRegisterAt(v, instr.(I), true)
}
// StoreRegisterBefore implements regalloc.Function StoreRegisterBefore.
func (f *RegAllocFunction[I, M]) StoreRegisterBefore(v regalloc.VReg, instr regalloc.Instr) {
m := f.m
m.InsertStoreRegisterAt(v, instr.(I), false)
}
// ClobberedRegisters implements regalloc.Function ClobberedRegisters.
func (f *RegAllocFunction[I, M]) ClobberedRegisters(regs []regalloc.VReg) {
f.m.ClobberedRegisters(regs)
}
// SwapBefore implements regalloc.Function SwapBefore.
func (f *RegAllocFunction[I, M]) SwapBefore(x1, x2, tmp regalloc.VReg, instr regalloc.Instr) {
f.m.Swap(instr.Prev().(I), x1, x2, tmp)
}
// PostOrderBlockIteratorBegin implements regalloc.Function PostOrderBlockIteratorBegin.
func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorBegin() regalloc.Block {
f.iter = len(f.reversePostOrderBlocks) - 1
return f.PostOrderBlockIteratorNext()
}
// PostOrderBlockIteratorNext implements regalloc.Function PostOrderBlockIteratorNext.
func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorNext() regalloc.Block {
if f.iter < 0 {
return nil
}
b := &f.reversePostOrderBlocks[f.iter]
f.iter--
return b
}
// ReversePostOrderBlockIteratorBegin implements regalloc.Function ReversePostOrderBlockIteratorBegin.
func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorBegin() regalloc.Block {
f.iter = 0
return f.ReversePostOrderBlockIteratorNext()
}
// ReversePostOrderBlockIteratorNext implements regalloc.Function ReversePostOrderBlockIteratorNext.
func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorNext() regalloc.Block {
if f.iter >= len(f.reversePostOrderBlocks) {
return nil
}
b := &f.reversePostOrderBlocks[f.iter]
f.iter++
return b
}
// LoopNestingForestRoots implements regalloc.Function LoopNestingForestRoots.
func (f *RegAllocFunction[I, M]) LoopNestingForestRoots() int {
f.loopNestingForestRoots = f.ssb.LoopNestingForestRoots()
return len(f.loopNestingForestRoots)
}
// LoopNestingForestRoot implements regalloc.Function LoopNestingForestRoot.
func (f *RegAllocFunction[I, M]) LoopNestingForestRoot(i int) regalloc.Block {
blk := f.loopNestingForestRoots[i]
l := f.m.SSABlockLabel(blk.ID())
index := f.labelToRegAllocBlockIndex[l]
return &f.reversePostOrderBlocks[index]
}
// InsertMoveBefore implements regalloc.Function InsertMoveBefore.
func (f *RegAllocFunction[I, M]) InsertMoveBefore(dst, src regalloc.VReg, instr regalloc.Instr) {
f.m.InsertMoveBefore(dst, src, instr.(I))
}
// LowestCommonAncestor implements regalloc.Function LowestCommonAncestor.
func (f *RegAllocFunction[I, M]) LowestCommonAncestor(blk1, blk2 regalloc.Block) regalloc.Block {
ret := f.ssb.LowestCommonAncestor(blk1.(*RegAllocBlock[I, M]).sb, blk2.(*RegAllocBlock[I, M]).sb)
l := f.m.SSABlockLabel(ret.ID())
index := f.labelToRegAllocBlockIndex[l]
return &f.reversePostOrderBlocks[index]
}
// Idom implements regalloc.Function Idom.
func (f *RegAllocFunction[I, M]) Idom(blk regalloc.Block) regalloc.Block {
builder := f.ssb
idom := builder.Idom(blk.(*RegAllocBlock[I, M]).sb)
if idom == nil {
panic("BUG: idom must not be nil")
}
l := f.m.SSABlockLabel(idom.ID())
index := f.labelToRegAllocBlockIndex[l]
return &f.reversePostOrderBlocks[index]
}
// ID implements regalloc.Block.
func (r *RegAllocBlock[I, m]) ID() int32 { return int32(r.id) }
// BlockParams implements regalloc.Block.
func (r *RegAllocBlock[I, m]) BlockParams(regs *[]regalloc.VReg) []regalloc.VReg {
c := r.f.c
*regs = (*regs)[:0]
for i := 0; i < r.sb.Params(); i++ {
v := c.VRegOf(r.sb.Param(i))
*regs = append(*regs, v)
}
return *regs
}
// InstrIteratorBegin implements regalloc.Block.
func (r *RegAllocBlock[I, m]) InstrIteratorBegin() regalloc.Instr {
r.cur = r.begin
return r.cur
}
// InstrIteratorNext implements regalloc.Block.
func (r *RegAllocBlock[I, m]) InstrIteratorNext() regalloc.Instr {
for {
if r.cur == r.end {
return nil
}
instr := r.cur.Next()
r.cur = instr.(I)
if instr == nil {
return nil
} else if instr.AddedBeforeRegAlloc() {
// Only concerned about the instruction added before regalloc.
return instr
}
}
}
// InstrRevIteratorBegin implements regalloc.Block.
func (r *RegAllocBlock[I, m]) InstrRevIteratorBegin() regalloc.Instr {
r.cur = r.end
return r.cur
}
// InstrRevIteratorNext implements regalloc.Block.
func (r *RegAllocBlock[I, m]) InstrRevIteratorNext() regalloc.Instr {
for {
if r.cur == r.begin {
return nil
}
instr := r.cur.Prev()
r.cur = instr.(I)
if instr == nil {
return nil
} else if instr.AddedBeforeRegAlloc() {
// Only concerned about the instruction added before regalloc.
return instr
}
}
}
// FirstInstr implements regalloc.Block.
func (r *RegAllocBlock[I, m]) FirstInstr() regalloc.Instr {
return r.begin
}
// EndInstr implements regalloc.Block.
func (r *RegAllocBlock[I, m]) EndInstr() regalloc.Instr {
return r.end
}
// LastInstrForInsertion implements regalloc.Block.
func (r *RegAllocBlock[I, m]) LastInstrForInsertion() regalloc.Instr {
var nil I
if r.cachedLastInstrForInsertion == nil {
r.cachedLastInstrForInsertion = r.f.m.LastInstrForInsertion(r.begin, r.end)
}
return r.cachedLastInstrForInsertion
}
// Preds implements regalloc.Block.
func (r *RegAllocBlock[I, m]) Preds() int { return r.sb.Preds() }
// Pred implements regalloc.Block.
func (r *RegAllocBlock[I, m]) Pred(i int) regalloc.Block {
sb := r.sb
pred := sb.Pred(i)
l := r.f.m.SSABlockLabel(pred.ID())
index := r.f.labelToRegAllocBlockIndex[l]
return &r.f.reversePostOrderBlocks[index]
}
// Entry implements regalloc.Block.
func (r *RegAllocBlock[I, m]) Entry() bool { return r.sb.EntryBlock() }
// Succs implements regalloc.Block.
func (r *RegAllocBlock[I, m]) Succs() int {
return r.sb.Succs()
}
// Succ implements regalloc.Block.
func (r *RegAllocBlock[I, m]) Succ(i int) regalloc.Block {
sb := r.sb
succ := sb.Succ(i)
if succ.ReturnBlock() {
return nil
}
l := r.f.m.SSABlockLabel(succ.ID())
index := r.f.labelToRegAllocBlockIndex[l]
return &r.f.reversePostOrderBlocks[index]
}
// LoopHeader implements regalloc.Block.
func (r *RegAllocBlock[I, m]) LoopHeader() bool {
return r.sb.LoopHeader()
}
// LoopNestingForestChildren implements regalloc.Block.
func (r *RegAllocBlock[I, m]) LoopNestingForestChildren() int {
r.loopNestingForestChildren = r.sb.LoopNestingForestChildren()
return len(r.loopNestingForestChildren)
}
// LoopNestingForestChild implements regalloc.Block.
func (r *RegAllocBlock[I, m]) LoopNestingForestChild(i int) regalloc.Block {
blk := r.loopNestingForestChildren[i]
l := r.f.m.SSABlockLabel(blk.ID())
index := r.f.labelToRegAllocBlockIndex[l]
return &r.f.reversePostOrderBlocks[index]
}

View File

@ -0,0 +1,136 @@
package regalloc
import "fmt"
// These interfaces are implemented by ISA-specific backends to abstract away the details, and allow the register
// allocators to work on any ISA.
//
// TODO: the interfaces are not stabilized yet, especially x64 will need some changes. E.g. x64 has an addressing mode
// where index can be in memory. That kind of info will be useful to reduce the register pressure, and should be leveraged
// by the register allocators, like https://docs.rs/regalloc2/latest/regalloc2/enum.OperandConstraint.html
type (
// Function is the top-level interface to do register allocation, which corresponds to a CFG containing
// Blocks(s).
Function interface {
// PostOrderBlockIteratorBegin returns the first block in the post-order traversal of the CFG.
// In other words, the last blocks in the CFG will be returned first.
PostOrderBlockIteratorBegin() Block
// PostOrderBlockIteratorNext returns the next block in the post-order traversal of the CFG.
PostOrderBlockIteratorNext() Block
// ReversePostOrderBlockIteratorBegin returns the first block in the reverse post-order traversal of the CFG.
// In other words, the first blocks in the CFG will be returned first.
ReversePostOrderBlockIteratorBegin() Block
// ReversePostOrderBlockIteratorNext returns the next block in the reverse post-order traversal of the CFG.
ReversePostOrderBlockIteratorNext() Block
// ClobberedRegisters tell the clobbered registers by this function.
ClobberedRegisters([]VReg)
// LoopNestingForestRoots returns the number of roots of the loop nesting forest in a function.
LoopNestingForestRoots() int
// LoopNestingForestRoot returns the i-th root of the loop nesting forest in a function.
LoopNestingForestRoot(i int) Block
// LowestCommonAncestor returns the lowest common ancestor of two blocks in the dominator tree.
LowestCommonAncestor(blk1, blk2 Block) Block
// Idom returns the immediate dominator of the given block.
Idom(blk Block) Block
// Followings are for rewriting the function.
// SwapAtEndOfBlock swaps the two virtual registers at the end of the given block.
SwapBefore(x1, x2, tmp VReg, instr Instr)
// StoreRegisterBefore inserts store instruction(s) before the given instruction for the given virtual register.
StoreRegisterBefore(v VReg, instr Instr)
// StoreRegisterAfter inserts store instruction(s) after the given instruction for the given virtual register.
StoreRegisterAfter(v VReg, instr Instr)
// ReloadRegisterBefore inserts reload instruction(s) before the given instruction for the given virtual register.
ReloadRegisterBefore(v VReg, instr Instr)
// ReloadRegisterAfter inserts reload instruction(s) after the given instruction for the given virtual register.
ReloadRegisterAfter(v VReg, instr Instr)
// InsertMoveBefore inserts move instruction(s) before the given instruction for the given virtual registers.
InsertMoveBefore(dst, src VReg, instr Instr)
}
// Block is a basic block in the CFG of a function, and it consists of multiple instructions, and predecessor Block(s).
Block interface {
// ID returns the unique identifier of this block which is ordered in the reverse post-order traversal of the CFG.
ID() int32
// BlockParams returns the virtual registers used as the parameters of this block.
BlockParams(*[]VReg) []VReg
// InstrIteratorBegin returns the first instruction in this block. Instructions added after lowering must be skipped.
// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
InstrIteratorBegin() Instr
// InstrIteratorNext returns the next instruction in this block. Instructions added after lowering must be skipped.
// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
InstrIteratorNext() Instr
// InstrRevIteratorBegin is the same as InstrIteratorBegin, but in the reverse order.
InstrRevIteratorBegin() Instr
// InstrRevIteratorNext is the same as InstrIteratorNext, but in the reverse order.
InstrRevIteratorNext() Instr
// FirstInstr returns the fist instruction in this block where instructions will be inserted after it.
FirstInstr() Instr
// EndInstr returns the end instruction in this block.
EndInstr() Instr
// LastInstrForInsertion returns the last instruction in this block where instructions will be inserted before it.
// Such insertions only happen when we need to insert spill/reload instructions to adjust the merge edges.
// At the time of register allocation, all the critical edges are already split, so there is no need
// to worry about the case where branching instruction has multiple successors.
// Therefore, usually, it is the nop instruction, but if the block ends with an unconditional branching, then it returns
// the unconditional branch, not the nop. In other words it is either nop or unconditional branch.
LastInstrForInsertion() Instr
// Preds returns the number of predecessors of this block in the CFG.
Preds() int
// Pred returns the i-th predecessor of this block in the CFG.
Pred(i int) Block
// Entry returns true if the block is for the entry block.
Entry() bool
// Succs returns the number of successors of this block in the CFG.
Succs() int
// Succ returns the i-th successor of this block in the CFG.
Succ(i int) Block
// LoopHeader returns true if this block is a loop header.
LoopHeader() bool
// LoopNestingForestChildren returns the number of children of this block in the loop nesting forest.
LoopNestingForestChildren() int
// LoopNestingForestChild returns the i-th child of this block in the loop nesting forest.
LoopNestingForestChild(i int) Block
}
// Instr is an instruction in a block, abstracting away the underlying ISA.
Instr interface {
fmt.Stringer
// Next returns the next instruction in the same block.
Next() Instr
// Prev returns the previous instruction in the same block.
Prev() Instr
// Defs returns the virtual registers defined by this instruction.
Defs(*[]VReg) []VReg
// Uses returns the virtual registers used by this instruction.
// Note: multiple returned []VReg will not be held at the same time, so it's safe to use the same slice for this.
Uses(*[]VReg) []VReg
// AssignUse assigns the RealReg-allocated virtual register used by this instruction at the given index.
AssignUse(index int, v VReg)
// AssignDef assigns a RealReg-allocated virtual register defined by this instruction.
// This only accepts one register because we don't allocate registers for multi-def instructions (i.e. call instruction)
AssignDef(VReg)
// IsCopy returns true if this instruction is a move instruction between two registers.
// If true, the instruction is of the form of dst = src, and if the src and dst do not interfere with each other,
// we could coalesce them, and hence the copy can be eliminated from the final code.
IsCopy() bool
// IsCall returns true if this instruction is a call instruction. The result is used to insert
// caller saved register spills and restores.
IsCall() bool
// IsIndirectCall returns true if this instruction is an indirect call instruction which calls a function pointer.
// The result is used to insert caller saved register spills and restores.
IsIndirectCall() bool
// IsReturn returns true if this instruction is a return instruction.
IsReturn() bool
// AddedBeforeRegAlloc returns true if this instruction is added before register allocation.
AddedBeforeRegAlloc() bool
}
// InstrConstraint is an interface for arch-specific instruction constraints.
InstrConstraint interface {
comparable
Instr
}
)

View File

@ -0,0 +1,123 @@
package regalloc
import (
"fmt"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
// VReg represents a register which is assigned to an SSA value. This is used to represent a register in the backend.
// A VReg may or may not be a physical register, and the info of physical register can be obtained by RealReg.
type VReg uint64
// VRegID is the lower 32bit of VReg, which is the pure identifier of VReg without RealReg info.
type VRegID uint32
// RealReg returns the RealReg of this VReg.
func (v VReg) RealReg() RealReg {
return RealReg(v >> 32)
}
// IsRealReg returns true if this VReg is backed by a physical register.
func (v VReg) IsRealReg() bool {
return v.RealReg() != RealRegInvalid
}
// FromRealReg returns a VReg from the given RealReg and RegType.
// This is used to represent a specific pre-colored register in the backend.
func FromRealReg(r RealReg, typ RegType) VReg {
rid := VRegID(r)
if rid > vRegIDReservedForRealNum {
panic(fmt.Sprintf("invalid real reg %d", r))
}
return VReg(r).SetRealReg(r).SetRegType(typ)
}
// SetRealReg sets the RealReg of this VReg and returns the updated VReg.
func (v VReg) SetRealReg(r RealReg) VReg {
return VReg(r)<<32 | (v & 0xff_00_ffffffff)
}
// RegType returns the RegType of this VReg.
func (v VReg) RegType() RegType {
return RegType(v >> 40)
}
// SetRegType sets the RegType of this VReg and returns the updated VReg.
func (v VReg) SetRegType(t RegType) VReg {
return VReg(t)<<40 | (v & 0x00_ff_ffffffff)
}
// ID returns the VRegID of this VReg.
func (v VReg) ID() VRegID {
return VRegID(v & 0xffffffff)
}
// Valid returns true if this VReg is Valid.
func (v VReg) Valid() bool {
return v.ID() != vRegIDInvalid && v.RegType() != RegTypeInvalid
}
// RealReg represents a physical register.
type RealReg byte
const RealRegInvalid RealReg = 0
const (
vRegIDInvalid VRegID = 1 << 31
VRegIDNonReservedBegin = vRegIDReservedForRealNum
vRegIDReservedForRealNum VRegID = 128
VRegInvalid = VReg(vRegIDInvalid)
)
// String implements fmt.Stringer.
func (r RealReg) String() string {
switch r {
case RealRegInvalid:
return "invalid"
default:
return fmt.Sprintf("r%d", r)
}
}
// String implements fmt.Stringer.
func (v VReg) String() string {
if v.IsRealReg() {
return fmt.Sprintf("r%d", v.ID())
}
return fmt.Sprintf("v%d?", v.ID())
}
// RegType represents the type of a register.
type RegType byte
const (
RegTypeInvalid RegType = iota
RegTypeInt
RegTypeFloat
NumRegType
)
// String implements fmt.Stringer.
func (r RegType) String() string {
switch r {
case RegTypeInt:
return "int"
case RegTypeFloat:
return "float"
default:
return "invalid"
}
}
// RegTypeOf returns the RegType of the given ssa.Type.
func RegTypeOf(p ssa.Type) RegType {
switch p {
case ssa.TypeI32, ssa.TypeI64:
return RegTypeInt
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
return RegTypeFloat
default:
panic("invalid type")
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,108 @@
package regalloc
import (
"fmt"
"strings"
)
// NewRegSet returns a new RegSet with the given registers.
func NewRegSet(regs ...RealReg) RegSet {
var ret RegSet
for _, r := range regs {
ret = ret.add(r)
}
return ret
}
// RegSet represents a set of registers.
type RegSet uint64
func (rs RegSet) format(info *RegisterInfo) string { //nolint:unused
var ret []string
for i := 0; i < 64; i++ {
if rs&(1<<uint(i)) != 0 {
ret = append(ret, info.RealRegName(RealReg(i)))
}
}
return strings.Join(ret, ", ")
}
func (rs RegSet) has(r RealReg) bool {
return rs&(1<<uint(r)) != 0
}
func (rs RegSet) add(r RealReg) RegSet {
if r >= 64 {
return rs
}
return rs | 1<<uint(r)
}
func (rs RegSet) Range(f func(allocatedRealReg RealReg)) {
for i := 0; i < 64; i++ {
if rs&(1<<uint(i)) != 0 {
f(RealReg(i))
}
}
}
type regInUseSet struct {
set RegSet
vrs [64]VReg
}
func (rs *regInUseSet) reset() {
rs.set = 0
for i := range rs.vrs {
rs.vrs[i] = VRegInvalid
}
}
func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused
var ret []string
for i := 0; i < 64; i++ {
if rs.set&(1<<uint(i)) != 0 {
vr := rs.vrs[i]
ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.ID()))
}
}
return strings.Join(ret, ", ")
}
func (rs *regInUseSet) has(r RealReg) bool {
if r >= 64 {
return false
}
return rs.set&(1<<uint(r)) != 0
}
func (rs *regInUseSet) get(r RealReg) VReg {
if r >= 64 {
return VRegInvalid
}
return rs.vrs[r]
}
func (rs *regInUseSet) remove(r RealReg) {
if r >= 64 {
return
}
rs.set &= ^(1 << uint(r))
rs.vrs[r] = VRegInvalid
}
func (rs *regInUseSet) add(r RealReg, vr VReg) {
if r >= 64 {
return
}
rs.set |= 1 << uint(r)
rs.vrs[r] = vr
}
func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) {
for i := 0; i < 64; i++ {
if rs.set&(1<<uint(i)) != 0 {
f(RealReg(i), rs.vrs[i])
}
}
}

View File

@ -0,0 +1,43 @@
package backend
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
// SSAValueDefinition represents a definition of an SSA value.
type SSAValueDefinition struct {
// BlockParamValue is valid if Instr == nil
BlockParamValue ssa.Value
// BlkParamVReg is valid if Instr == nil
BlkParamVReg regalloc.VReg
// Instr is not nil if this is a definition from an instruction.
Instr *ssa.Instruction
// N is the index of the return value in the instr's return values list.
N int
// RefCount is the number of references to the result.
RefCount int
}
func (d *SSAValueDefinition) IsFromInstr() bool {
return d.Instr != nil
}
func (d *SSAValueDefinition) IsFromBlockParam() bool {
return d.Instr == nil
}
func (d *SSAValueDefinition) SSAValue() ssa.Value {
if d.IsFromBlockParam() {
return d.BlockParamValue
} else {
r, rs := d.Instr.Returns()
if d.N == 0 {
return r
} else {
return rs[d.N-1]
}
}
}

View File

@ -0,0 +1,722 @@
package wazevo
import (
"context"
"encoding/binary"
"fmt"
"reflect"
"runtime"
"sync/atomic"
"unsafe"
"github.com/tetratelabs/wazero/api"
"github.com/tetratelabs/wazero/experimental"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
"github.com/tetratelabs/wazero/internal/expctxkeys"
"github.com/tetratelabs/wazero/internal/internalapi"
"github.com/tetratelabs/wazero/internal/wasm"
"github.com/tetratelabs/wazero/internal/wasmdebug"
"github.com/tetratelabs/wazero/internal/wasmruntime"
)
type (
// callEngine implements api.Function.
callEngine struct {
internalapi.WazeroOnly
stack []byte
// stackTop is the pointer to the *aligned* top of the stack. This must be updated
// whenever the stack is changed. This is passed to the assembly function
// at the very beginning of api.Function Call/CallWithStack.
stackTop uintptr
// executable is the pointer to the executable code for this function.
executable *byte
preambleExecutable *byte
// parent is the *moduleEngine from which this callEngine is created.
parent *moduleEngine
// indexInModule is the index of the function in the module.
indexInModule wasm.Index
// sizeOfParamResultSlice is the size of the parameter/result slice.
sizeOfParamResultSlice int
requiredParams int
// execCtx holds various information to be read/written by assembly functions.
execCtx executionContext
// execCtxPtr holds the pointer to the executionContext which doesn't change after callEngine is created.
execCtxPtr uintptr
numberOfResults int
stackIteratorImpl stackIterator
}
// executionContext is the struct to be read/written by assembly functions.
executionContext struct {
// exitCode holds the wazevoapi.ExitCode describing the state of the function execution.
exitCode wazevoapi.ExitCode
// callerModuleContextPtr holds the moduleContextOpaque for Go function calls.
callerModuleContextPtr *byte
// originalFramePointer holds the original frame pointer of the caller of the assembly function.
originalFramePointer uintptr
// originalStackPointer holds the original stack pointer of the caller of the assembly function.
originalStackPointer uintptr
// goReturnAddress holds the return address to go back to the caller of the assembly function.
goReturnAddress uintptr
// stackBottomPtr holds the pointer to the bottom of the stack.
stackBottomPtr *byte
// goCallReturnAddress holds the return address to go back to the caller of the Go function.
goCallReturnAddress *byte
// stackPointerBeforeGoCall holds the stack pointer before calling a Go function.
stackPointerBeforeGoCall *uint64
// stackGrowRequiredSize holds the required size of stack grow.
stackGrowRequiredSize uintptr
// memoryGrowTrampolineAddress holds the address of memory grow trampoline function.
memoryGrowTrampolineAddress *byte
// stackGrowCallTrampolineAddress holds the address of stack grow trampoline function.
stackGrowCallTrampolineAddress *byte
// checkModuleExitCodeTrampolineAddress holds the address of check-module-exit-code function.
checkModuleExitCodeTrampolineAddress *byte
// savedRegisters is the opaque spaces for save/restore registers.
// We want to align 16 bytes for each register, so we use [64][2]uint64.
savedRegisters [64][2]uint64
// goFunctionCallCalleeModuleContextOpaque is the pointer to the target Go function's moduleContextOpaque.
goFunctionCallCalleeModuleContextOpaque uintptr
// tableGrowTrampolineAddress holds the address of table grow trampoline function.
tableGrowTrampolineAddress *byte
// refFuncTrampolineAddress holds the address of ref-func trampoline function.
refFuncTrampolineAddress *byte
// memmoveAddress holds the address of memmove function implemented by Go runtime. See memmove.go.
memmoveAddress uintptr
// framePointerBeforeGoCall holds the frame pointer before calling a Go function. Note: only used in amd64.
framePointerBeforeGoCall uintptr
// memoryWait32TrampolineAddress holds the address of memory_wait32 trampoline function.
memoryWait32TrampolineAddress *byte
// memoryWait32TrampolineAddress holds the address of memory_wait64 trampoline function.
memoryWait64TrampolineAddress *byte
// memoryNotifyTrampolineAddress holds the address of the memory_notify trampoline function.
memoryNotifyTrampolineAddress *byte
}
)
func (c *callEngine) requiredInitialStackSize() int {
const initialStackSizeDefault = 10240
stackSize := initialStackSizeDefault
paramResultInBytes := c.sizeOfParamResultSlice * 8 * 2 // * 8 because uint64 is 8 bytes, and *2 because we need both separated param/result slots.
required := paramResultInBytes + 32 + 16 // 32 is enough to accommodate the call frame info, and 16 exists just in case when []byte is not aligned to 16 bytes.
if required > stackSize {
stackSize = required
}
return stackSize
}
func (c *callEngine) init() {
stackSize := c.requiredInitialStackSize()
if wazevoapi.StackGuardCheckEnabled {
stackSize += wazevoapi.StackGuardCheckGuardPageSize
}
c.stack = make([]byte, stackSize)
c.stackTop = alignedStackTop(c.stack)
if wazevoapi.StackGuardCheckEnabled {
c.execCtx.stackBottomPtr = &c.stack[wazevoapi.StackGuardCheckGuardPageSize]
} else {
c.execCtx.stackBottomPtr = &c.stack[0]
}
c.execCtxPtr = uintptr(unsafe.Pointer(&c.execCtx))
}
// alignedStackTop returns 16-bytes aligned stack top of given stack.
// 16 bytes should be good for all platform (arm64/amd64).
func alignedStackTop(s []byte) uintptr {
stackAddr := uintptr(unsafe.Pointer(&s[len(s)-1]))
return stackAddr - (stackAddr & (16 - 1))
}
// Definition implements api.Function.
func (c *callEngine) Definition() api.FunctionDefinition {
return c.parent.module.Source.FunctionDefinition(c.indexInModule)
}
// Call implements api.Function.
func (c *callEngine) Call(ctx context.Context, params ...uint64) ([]uint64, error) {
if c.requiredParams != len(params) {
return nil, fmt.Errorf("expected %d params, but passed %d", c.requiredParams, len(params))
}
paramResultSlice := make([]uint64, c.sizeOfParamResultSlice)
copy(paramResultSlice, params)
if err := c.callWithStack(ctx, paramResultSlice); err != nil {
return nil, err
}
return paramResultSlice[:c.numberOfResults], nil
}
func (c *callEngine) addFrame(builder wasmdebug.ErrorBuilder, addr uintptr) (def api.FunctionDefinition, listener experimental.FunctionListener) {
eng := c.parent.parent.parent
cm := eng.compiledModuleOfAddr(addr)
if cm == nil {
// This case, the module might have been closed and deleted from the engine.
// We fall back to searching the imported modules that can be referenced from this callEngine.
// First, we check itself.
if checkAddrInBytes(addr, c.parent.parent.executable) {
cm = c.parent.parent
} else {
// Otherwise, search all imported modules. TODO: maybe recursive, but not sure it's useful in practice.
p := c.parent
for i := range p.importedFunctions {
candidate := p.importedFunctions[i].me.parent
if checkAddrInBytes(addr, candidate.executable) {
cm = candidate
break
}
}
}
}
if cm != nil {
index := cm.functionIndexOf(addr)
def = cm.module.FunctionDefinition(cm.module.ImportFunctionCount + index)
var sources []string
if dw := cm.module.DWARFLines; dw != nil {
sourceOffset := cm.getSourceOffset(addr)
sources = dw.Line(sourceOffset)
}
builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources)
if len(cm.listeners) > 0 {
listener = cm.listeners[index]
}
}
return
}
// CallWithStack implements api.Function.
func (c *callEngine) CallWithStack(ctx context.Context, paramResultStack []uint64) (err error) {
if c.sizeOfParamResultSlice > len(paramResultStack) {
return fmt.Errorf("need %d params, but stack size is %d", c.sizeOfParamResultSlice, len(paramResultStack))
}
return c.callWithStack(ctx, paramResultStack)
}
// CallWithStack implements api.Function.
func (c *callEngine) callWithStack(ctx context.Context, paramResultStack []uint64) (err error) {
snapshotEnabled := ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil
if snapshotEnabled {
ctx = context.WithValue(ctx, expctxkeys.SnapshotterKey{}, c)
}
if wazevoapi.StackGuardCheckEnabled {
defer func() {
wazevoapi.CheckStackGuardPage(c.stack)
}()
}
p := c.parent
ensureTermination := p.parent.ensureTermination
m := p.module
if ensureTermination {
select {
case <-ctx.Done():
// If the provided context is already done, close the module and return the error.
m.CloseWithCtxErr(ctx)
return m.FailIfClosed()
default:
}
}
var paramResultPtr *uint64
if len(paramResultStack) > 0 {
paramResultPtr = &paramResultStack[0]
}
defer func() {
r := recover()
if s, ok := r.(*snapshot); ok {
// A snapshot that wasn't handled was created by a different call engine possibly from a nested wasm invocation,
// let it propagate up to be handled by the caller.
panic(s)
}
if r != nil {
type listenerForAbort struct {
def api.FunctionDefinition
lsn experimental.FunctionListener
}
var listeners []listenerForAbort
builder := wasmdebug.NewErrorBuilder()
def, lsn := c.addFrame(builder, uintptr(unsafe.Pointer(c.execCtx.goCallReturnAddress)))
if lsn != nil {
listeners = append(listeners, listenerForAbort{def, lsn})
}
returnAddrs := unwindStack(
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)),
c.execCtx.framePointerBeforeGoCall,
c.stackTop,
nil,
)
for _, retAddr := range returnAddrs[:len(returnAddrs)-1] { // the last return addr is the trampoline, so we skip it.
def, lsn = c.addFrame(builder, retAddr)
if lsn != nil {
listeners = append(listeners, listenerForAbort{def, lsn})
}
}
err = builder.FromRecovered(r)
for _, lsn := range listeners {
lsn.lsn.Abort(ctx, m, lsn.def, err)
}
} else {
if err != wasmruntime.ErrRuntimeStackOverflow { // Stackoverflow case shouldn't be panic (to avoid extreme stack unwinding).
err = c.parent.module.FailIfClosed()
}
}
if err != nil {
// Ensures that we can reuse this callEngine even after an error.
c.execCtx.exitCode = wazevoapi.ExitCodeOK
}
}()
if ensureTermination {
done := m.CloseModuleOnCanceledOrTimeout(ctx)
defer done()
}
if c.stackTop&(16-1) != 0 {
panic("BUG: stack must be aligned to 16 bytes")
}
entrypoint(c.preambleExecutable, c.executable, c.execCtxPtr, c.parent.opaquePtr, paramResultPtr, c.stackTop)
for {
switch ec := c.execCtx.exitCode; ec & wazevoapi.ExitCodeMask {
case wazevoapi.ExitCodeOK:
return nil
case wazevoapi.ExitCodeGrowStack:
oldsp := uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall))
oldTop := c.stackTop
oldStack := c.stack
var newsp, newfp uintptr
if wazevoapi.StackGuardCheckEnabled {
newsp, newfp, err = c.growStackWithGuarded()
} else {
newsp, newfp, err = c.growStack()
}
if err != nil {
return err
}
adjustClonedStack(oldsp, oldTop, newsp, newfp, c.stackTop)
// Old stack must be alive until the new stack is adjusted.
runtime.KeepAlive(oldStack)
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, newsp, newfp)
case wazevoapi.ExitCodeGrowMemory:
mod := c.callerModuleInstance()
mem := mod.MemoryInstance
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
argRes := &s[0]
if res, ok := mem.Grow(uint32(*argRes)); !ok {
*argRes = uint64(0xffffffff) // = -1 in signed 32-bit integer.
} else {
*argRes = uint64(res)
calleeOpaque := opaqueViewFromPtr(uintptr(unsafe.Pointer(c.execCtx.callerModuleContextPtr)))
if mod.Source.MemorySection != nil { // Local memory.
putLocalMemory(calleeOpaque, 8 /* local memory begins at 8 */, mem)
} else {
// Imported memory's owner at offset 16 of the callerModuleContextPtr.
opaquePtr := uintptr(binary.LittleEndian.Uint64(calleeOpaque[16:]))
importedMemOwner := opaqueViewFromPtr(opaquePtr)
putLocalMemory(importedMemOwner, 8 /* local memory begins at 8 */, mem)
}
}
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
case wazevoapi.ExitCodeTableGrow:
mod := c.callerModuleInstance()
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
tableIndex, num, ref := uint32(s[0]), uint32(s[1]), uintptr(s[2])
table := mod.Tables[tableIndex]
s[0] = uint64(uint32(int32(table.Grow(num, ref))))
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
case wazevoapi.ExitCodeCallGoFunction:
index := wazevoapi.GoFunctionIndexFromExitCode(ec)
f := hostModuleGoFuncFromOpaque[api.GoFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
func() {
if snapshotEnabled {
defer snapshotRecoverFn(c)
}
f.Call(ctx, goCallStackView(c.execCtx.stackPointerBeforeGoCall))
}()
// Back to the native code.
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
case wazevoapi.ExitCodeCallGoFunctionWithListener:
index := wazevoapi.GoFunctionIndexFromExitCode(ec)
f := hostModuleGoFuncFromOpaque[api.GoFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
listeners := hostModuleListenersSliceFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
// Call Listener.Before.
callerModule := c.callerModuleInstance()
listener := listeners[index]
hostModule := hostModuleFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
def := hostModule.FunctionDefinition(wasm.Index(index))
listener.Before(ctx, callerModule, def, s, c.stackIterator(true))
// Call into the Go function.
func() {
if snapshotEnabled {
defer snapshotRecoverFn(c)
}
f.Call(ctx, s)
}()
// Call Listener.After.
listener.After(ctx, callerModule, def, s)
// Back to the native code.
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
case wazevoapi.ExitCodeCallGoModuleFunction:
index := wazevoapi.GoFunctionIndexFromExitCode(ec)
f := hostModuleGoFuncFromOpaque[api.GoModuleFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
mod := c.callerModuleInstance()
func() {
if snapshotEnabled {
defer snapshotRecoverFn(c)
}
f.Call(ctx, mod, goCallStackView(c.execCtx.stackPointerBeforeGoCall))
}()
// Back to the native code.
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
case wazevoapi.ExitCodeCallGoModuleFunctionWithListener:
index := wazevoapi.GoFunctionIndexFromExitCode(ec)
f := hostModuleGoFuncFromOpaque[api.GoModuleFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
listeners := hostModuleListenersSliceFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
// Call Listener.Before.
callerModule := c.callerModuleInstance()
listener := listeners[index]
hostModule := hostModuleFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
def := hostModule.FunctionDefinition(wasm.Index(index))
listener.Before(ctx, callerModule, def, s, c.stackIterator(true))
// Call into the Go function.
func() {
if snapshotEnabled {
defer snapshotRecoverFn(c)
}
f.Call(ctx, callerModule, s)
}()
// Call Listener.After.
listener.After(ctx, callerModule, def, s)
// Back to the native code.
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
case wazevoapi.ExitCodeCallListenerBefore:
stack := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
index := wasm.Index(stack[0])
mod := c.callerModuleInstance()
listener := mod.Engine.(*moduleEngine).listeners[index]
def := mod.Source.FunctionDefinition(index + mod.Source.ImportFunctionCount)
listener.Before(ctx, mod, def, stack[1:], c.stackIterator(false))
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
case wazevoapi.ExitCodeCallListenerAfter:
stack := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
index := wasm.Index(stack[0])
mod := c.callerModuleInstance()
listener := mod.Engine.(*moduleEngine).listeners[index]
def := mod.Source.FunctionDefinition(index + mod.Source.ImportFunctionCount)
listener.After(ctx, mod, def, stack[1:])
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
case wazevoapi.ExitCodeCheckModuleExitCode:
// Note: this operation must be done in Go, not native code. The reason is that
// native code cannot be preempted and that means it can block forever if there are not
// enough OS threads (which we don't have control over).
if err := m.FailIfClosed(); err != nil {
panic(err)
}
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
case wazevoapi.ExitCodeRefFunc:
mod := c.callerModuleInstance()
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
funcIndex := wasm.Index(s[0])
ref := mod.Engine.FunctionInstanceReference(funcIndex)
s[0] = uint64(ref)
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
case wazevoapi.ExitCodeMemoryWait32:
mod := c.callerModuleInstance()
mem := mod.MemoryInstance
if !mem.Shared {
panic(wasmruntime.ErrRuntimeExpectedSharedMemory)
}
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
timeout, exp, addr := int64(s[0]), uint32(s[1]), uintptr(s[2])
base := uintptr(unsafe.Pointer(&mem.Buffer[0]))
offset := uint32(addr - base)
res := mem.Wait32(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint32 {
addr := unsafe.Add(unsafe.Pointer(&mem.Buffer[0]), offset)
return atomic.LoadUint32((*uint32)(addr))
})
s[0] = res
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
case wazevoapi.ExitCodeMemoryWait64:
mod := c.callerModuleInstance()
mem := mod.MemoryInstance
if !mem.Shared {
panic(wasmruntime.ErrRuntimeExpectedSharedMemory)
}
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
timeout, exp, addr := int64(s[0]), uint64(s[1]), uintptr(s[2])
base := uintptr(unsafe.Pointer(&mem.Buffer[0]))
offset := uint32(addr - base)
res := mem.Wait64(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint64 {
addr := unsafe.Add(unsafe.Pointer(&mem.Buffer[0]), offset)
return atomic.LoadUint64((*uint64)(addr))
})
s[0] = uint64(res)
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
case wazevoapi.ExitCodeMemoryNotify:
mod := c.callerModuleInstance()
mem := mod.MemoryInstance
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
count, addr := uint32(s[0]), s[1]
offset := uint32(uintptr(addr) - uintptr(unsafe.Pointer(&mem.Buffer[0])))
res := mem.Notify(offset, count)
s[0] = uint64(res)
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
case wazevoapi.ExitCodeUnreachable:
panic(wasmruntime.ErrRuntimeUnreachable)
case wazevoapi.ExitCodeMemoryOutOfBounds:
panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
case wazevoapi.ExitCodeTableOutOfBounds:
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
case wazevoapi.ExitCodeIndirectCallNullPointer:
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
case wazevoapi.ExitCodeIndirectCallTypeMismatch:
panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
case wazevoapi.ExitCodeIntegerOverflow:
panic(wasmruntime.ErrRuntimeIntegerOverflow)
case wazevoapi.ExitCodeIntegerDivisionByZero:
panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
case wazevoapi.ExitCodeInvalidConversionToInteger:
panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
case wazevoapi.ExitCodeUnalignedAtomic:
panic(wasmruntime.ErrRuntimeUnalignedAtomic)
default:
panic("BUG")
}
}
}
func (c *callEngine) callerModuleInstance() *wasm.ModuleInstance {
return moduleInstanceFromOpaquePtr(c.execCtx.callerModuleContextPtr)
}
func opaqueViewFromPtr(ptr uintptr) []byte {
var opaque []byte
sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaque))
sh.Data = ptr
setSliceLimits(sh, 24, 24)
return opaque
}
const callStackCeiling = uintptr(50000000) // in uint64 (8 bytes) == 400000000 bytes in total == 400mb.
func (c *callEngine) growStackWithGuarded() (newSP uintptr, newFP uintptr, err error) {
if wazevoapi.StackGuardCheckEnabled {
wazevoapi.CheckStackGuardPage(c.stack)
}
newSP, newFP, err = c.growStack()
if err != nil {
return
}
if wazevoapi.StackGuardCheckEnabled {
c.execCtx.stackBottomPtr = &c.stack[wazevoapi.StackGuardCheckGuardPageSize]
}
return
}
// growStack grows the stack, and returns the new stack pointer.
func (c *callEngine) growStack() (newSP, newFP uintptr, err error) {
currentLen := uintptr(len(c.stack))
if callStackCeiling < currentLen {
err = wasmruntime.ErrRuntimeStackOverflow
return
}
newLen := 2*currentLen + c.execCtx.stackGrowRequiredSize + 16 // Stack might be aligned to 16 bytes, so add 16 bytes just in case.
newSP, newFP, c.stackTop, c.stack = c.cloneStack(newLen)
c.execCtx.stackBottomPtr = &c.stack[0]
return
}
func (c *callEngine) cloneStack(l uintptr) (newSP, newFP, newTop uintptr, newStack []byte) {
newStack = make([]byte, l)
relSp := c.stackTop - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall))
relFp := c.stackTop - c.execCtx.framePointerBeforeGoCall
// Copy the existing contents in the previous Go-allocated stack into the new one.
var prevStackAligned, newStackAligned []byte
{
sh := (*reflect.SliceHeader)(unsafe.Pointer(&prevStackAligned))
sh.Data = c.stackTop - relSp
setSliceLimits(sh, relSp, relSp)
}
newTop = alignedStackTop(newStack)
{
newSP = newTop - relSp
newFP = newTop - relFp
sh := (*reflect.SliceHeader)(unsafe.Pointer(&newStackAligned))
sh.Data = newSP
setSliceLimits(sh, relSp, relSp)
}
copy(newStackAligned, prevStackAligned)
return
}
func (c *callEngine) stackIterator(onHostCall bool) experimental.StackIterator {
c.stackIteratorImpl.reset(c, onHostCall)
return &c.stackIteratorImpl
}
// stackIterator implements experimental.StackIterator.
type stackIterator struct {
retAddrs []uintptr
retAddrCursor int
eng *engine
pc uint64
currentDef *wasm.FunctionDefinition
}
func (si *stackIterator) reset(c *callEngine, onHostCall bool) {
if onHostCall {
si.retAddrs = append(si.retAddrs[:0], uintptr(unsafe.Pointer(c.execCtx.goCallReturnAddress)))
} else {
si.retAddrs = si.retAddrs[:0]
}
si.retAddrs = unwindStack(uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall, c.stackTop, si.retAddrs)
si.retAddrs = si.retAddrs[:len(si.retAddrs)-1] // the last return addr is the trampoline, so we skip it.
si.retAddrCursor = 0
si.eng = c.parent.parent.parent
}
// Next implements the same method as documented on experimental.StackIterator.
func (si *stackIterator) Next() bool {
if si.retAddrCursor >= len(si.retAddrs) {
return false
}
addr := si.retAddrs[si.retAddrCursor]
cm := si.eng.compiledModuleOfAddr(addr)
if cm != nil {
index := cm.functionIndexOf(addr)
def := cm.module.FunctionDefinition(cm.module.ImportFunctionCount + index)
si.currentDef = def
si.retAddrCursor++
si.pc = uint64(addr)
return true
}
return false
}
// ProgramCounter implements the same method as documented on experimental.StackIterator.
func (si *stackIterator) ProgramCounter() experimental.ProgramCounter {
return experimental.ProgramCounter(si.pc)
}
// Function implements the same method as documented on experimental.StackIterator.
func (si *stackIterator) Function() experimental.InternalFunction {
return si
}
// Definition implements the same method as documented on experimental.InternalFunction.
func (si *stackIterator) Definition() api.FunctionDefinition {
return si.currentDef
}
// SourceOffsetForPC implements the same method as documented on experimental.InternalFunction.
func (si *stackIterator) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 {
upc := uintptr(pc)
cm := si.eng.compiledModuleOfAddr(upc)
return cm.getSourceOffset(upc)
}
// snapshot implements experimental.Snapshot
type snapshot struct {
sp, fp, top uintptr
returnAddress *byte
stack []byte
savedRegisters [64][2]uint64
ret []uint64
c *callEngine
}
// Snapshot implements the same method as documented on experimental.Snapshotter.
func (c *callEngine) Snapshot() experimental.Snapshot {
returnAddress := c.execCtx.goCallReturnAddress
oldTop, oldSp := c.stackTop, uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall))
newSP, newFP, newTop, newStack := c.cloneStack(uintptr(len(c.stack)) + 16)
adjustClonedStack(oldSp, oldTop, newSP, newFP, newTop)
return &snapshot{
sp: newSP,
fp: newFP,
top: newTop,
savedRegisters: c.execCtx.savedRegisters,
returnAddress: returnAddress,
stack: newStack,
c: c,
}
}
// Restore implements the same method as documented on experimental.Snapshot.
func (s *snapshot) Restore(ret []uint64) {
s.ret = ret
panic(s)
}
func (s *snapshot) doRestore() {
spp := *(**uint64)(unsafe.Pointer(&s.sp))
view := goCallStackView(spp)
copy(view, s.ret)
c := s.c
c.stack = s.stack
c.stackTop = s.top
ec := &c.execCtx
ec.stackBottomPtr = &c.stack[0]
ec.stackPointerBeforeGoCall = spp
ec.framePointerBeforeGoCall = s.fp
ec.goCallReturnAddress = s.returnAddress
ec.savedRegisters = s.savedRegisters
}
// Error implements the same method on error.
func (s *snapshot) Error() string {
return "unhandled snapshot restore, this generally indicates restore was called from a different " +
"exported function invocation than snapshot"
}
func snapshotRecoverFn(c *callEngine) {
if r := recover(); r != nil {
if s, ok := r.(*snapshot); ok && s.c == c {
s.doRestore()
} else {
panic(r)
}
}
}

View File

@ -0,0 +1,843 @@
package wazevo
import (
"context"
"encoding/hex"
"errors"
"fmt"
"runtime"
"sort"
"sync"
"unsafe"
"github.com/tetratelabs/wazero/api"
"github.com/tetratelabs/wazero/experimental"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/frontend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
"github.com/tetratelabs/wazero/internal/filecache"
"github.com/tetratelabs/wazero/internal/platform"
"github.com/tetratelabs/wazero/internal/version"
"github.com/tetratelabs/wazero/internal/wasm"
)
type (
// engine implements wasm.Engine.
engine struct {
wazeroVersion string
fileCache filecache.Cache
compiledModules map[wasm.ModuleID]*compiledModule
// sortedCompiledModules is a list of compiled modules sorted by the initial address of the executable.
sortedCompiledModules []*compiledModule
mux sync.RWMutex
// sharedFunctions is compiled functions shared by all modules.
sharedFunctions *sharedFunctions
// setFinalizer defaults to runtime.SetFinalizer, but overridable for tests.
setFinalizer func(obj interface{}, finalizer interface{})
// The followings are reused for compiling shared functions.
machine backend.Machine
be backend.Compiler
}
sharedFunctions struct {
// memoryGrowExecutable is a compiled trampoline executable for memory.grow builtin function.
memoryGrowExecutable []byte
// checkModuleExitCode is a compiled trampoline executable for checking module instance exit code. This
// is used when ensureTermination is true.
checkModuleExitCode []byte
// stackGrowExecutable is a compiled executable for growing stack builtin function.
stackGrowExecutable []byte
// tableGrowExecutable is a compiled trampoline executable for table.grow builtin function.
tableGrowExecutable []byte
// refFuncExecutable is a compiled trampoline executable for ref.func builtin function.
refFuncExecutable []byte
// memoryWait32Executable is a compiled trampoline executable for memory.wait32 builtin function
memoryWait32Executable []byte
// memoryWait64Executable is a compiled trampoline executable for memory.wait64 builtin function
memoryWait64Executable []byte
// memoryNotifyExecutable is a compiled trampoline executable for memory.notify builtin function
memoryNotifyExecutable []byte
listenerBeforeTrampolines map[*wasm.FunctionType][]byte
listenerAfterTrampolines map[*wasm.FunctionType][]byte
}
// compiledModule is a compiled variant of a wasm.Module and ready to be used for instantiation.
compiledModule struct {
*executables
// functionOffsets maps a local function index to the offset in the executable.
functionOffsets []int
parent *engine
module *wasm.Module
ensureTermination bool
listeners []experimental.FunctionListener
listenerBeforeTrampolines []*byte
listenerAfterTrampolines []*byte
// The followings are only available for non host modules.
offsets wazevoapi.ModuleContextOffsetData
sharedFunctions *sharedFunctions
sourceMap sourceMap
}
executables struct {
executable []byte
entryPreambles [][]byte
}
)
// sourceMap is a mapping from the offset of the executable to the offset of the original wasm binary.
type sourceMap struct {
// executableOffsets is a sorted list of offsets of the executable. This is index-correlated with wasmBinaryOffsets,
// in other words executableOffsets[i] is the offset of the executable which corresponds to the offset of a Wasm
// binary pointed by wasmBinaryOffsets[i].
executableOffsets []uintptr
// wasmBinaryOffsets is the counterpart of executableOffsets.
wasmBinaryOffsets []uint64
}
var _ wasm.Engine = (*engine)(nil)
// NewEngine returns the implementation of wasm.Engine.
func NewEngine(ctx context.Context, _ api.CoreFeatures, fc filecache.Cache) wasm.Engine {
machine := newMachine()
be := backend.NewCompiler(ctx, machine, ssa.NewBuilder())
e := &engine{
compiledModules: make(map[wasm.ModuleID]*compiledModule),
setFinalizer: runtime.SetFinalizer,
machine: machine,
be: be,
fileCache: fc,
wazeroVersion: version.GetWazeroVersion(),
}
e.compileSharedFunctions()
return e
}
// CompileModule implements wasm.Engine.
func (e *engine) CompileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (err error) {
if wazevoapi.PerfMapEnabled {
wazevoapi.PerfMap.Lock()
defer wazevoapi.PerfMap.Unlock()
}
if _, ok, err := e.getCompiledModule(module, listeners, ensureTermination); ok { // cache hit!
return nil
} else if err != nil {
return err
}
if wazevoapi.DeterministicCompilationVerifierEnabled {
ctx = wazevoapi.NewDeterministicCompilationVerifierContext(ctx, len(module.CodeSection))
}
cm, err := e.compileModule(ctx, module, listeners, ensureTermination)
if err != nil {
return err
}
if err = e.addCompiledModule(module, cm); err != nil {
return err
}
if wazevoapi.DeterministicCompilationVerifierEnabled {
for i := 0; i < wazevoapi.DeterministicCompilationVerifyingIter; i++ {
_, err := e.compileModule(ctx, module, listeners, ensureTermination)
if err != nil {
return err
}
}
}
if len(listeners) > 0 {
cm.listeners = listeners
cm.listenerBeforeTrampolines = make([]*byte, len(module.TypeSection))
cm.listenerAfterTrampolines = make([]*byte, len(module.TypeSection))
for i := range module.TypeSection {
typ := &module.TypeSection[i]
before, after := e.getListenerTrampolineForType(typ)
cm.listenerBeforeTrampolines[i] = before
cm.listenerAfterTrampolines[i] = after
}
}
return nil
}
func (exec *executables) compileEntryPreambles(m *wasm.Module, machine backend.Machine, be backend.Compiler) {
exec.entryPreambles = make([][]byte, len(m.TypeSection))
for i := range m.TypeSection {
typ := &m.TypeSection[i]
sig := frontend.SignatureForWasmFunctionType(typ)
be.Init()
buf := machine.CompileEntryPreamble(&sig)
executable := mmapExecutable(buf)
exec.entryPreambles[i] = executable
if wazevoapi.PerfMapEnabled {
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&executable[0])),
uint64(len(executable)), fmt.Sprintf("entry_preamble::type=%s", typ.String()))
}
}
}
func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (*compiledModule, error) {
withListener := len(listeners) > 0
cm := &compiledModule{
offsets: wazevoapi.NewModuleContextOffsetData(module, withListener), parent: e, module: module,
ensureTermination: ensureTermination,
executables: &executables{},
}
if module.IsHostModule {
return e.compileHostModule(ctx, module, listeners)
}
importedFns, localFns := int(module.ImportFunctionCount), len(module.FunctionSection)
if localFns == 0 {
return cm, nil
}
rels := make([]backend.RelocationInfo, 0)
refToBinaryOffset := make([]int, importedFns+localFns)
if wazevoapi.DeterministicCompilationVerifierEnabled {
// The compilation must be deterministic regardless of the order of functions being compiled.
wazevoapi.DeterministicCompilationVerifierRandomizeIndexes(ctx)
}
needSourceInfo := module.DWARFLines != nil
// Creates new compiler instances which are reused for each function.
ssaBuilder := ssa.NewBuilder()
fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo)
machine := newMachine()
be := backend.NewCompiler(ctx, machine, ssaBuilder)
cm.executables.compileEntryPreambles(module, machine, be)
totalSize := 0 // Total binary size of the executable.
cm.functionOffsets = make([]int, localFns)
bodies := make([][]byte, localFns)
// Trampoline relocation related variables.
trampolineInterval, callTrampolineIslandSize, err := machine.CallTrampolineIslandInfo(localFns)
if err != nil {
return nil, err
}
needCallTrampoline := callTrampolineIslandSize > 0
var callTrampolineIslandOffsets []int // Holds the offsets of trampoline islands.
for i := range module.CodeSection {
if wazevoapi.DeterministicCompilationVerifierEnabled {
i = wazevoapi.DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx, i)
}
fidx := wasm.Index(i + importedFns)
if wazevoapi.NeedFunctionNameInContext {
def := module.FunctionDefinition(fidx)
name := def.DebugName()
if len(def.ExportNames()) > 0 {
name = def.ExportNames()[0]
}
ctx = wazevoapi.SetCurrentFunctionName(ctx, i, fmt.Sprintf("[%d/%d]%s", i, len(module.CodeSection)-1, name))
}
needListener := len(listeners) > 0 && listeners[i] != nil
body, relsPerFunc, err := e.compileLocalWasmFunction(ctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener)
if err != nil {
return nil, fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err)
}
// Align 16-bytes boundary.
totalSize = (totalSize + 15) &^ 15
cm.functionOffsets[i] = totalSize
if needSourceInfo {
// At the beginning of the function, we add the offset of the function body so that
// we can resolve the source location of the call site of before listener call.
cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize))
cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, module.CodeSection[i].BodyOffsetInCodeSection)
for _, info := range be.SourceOffsetInfo() {
cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize)+uintptr(info.ExecutableOffset))
cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, uint64(info.SourceOffset))
}
}
fref := frontend.FunctionIndexToFuncRef(fidx)
refToBinaryOffset[fref] = totalSize
// At this point, relocation offsets are relative to the start of the function body,
// so we adjust it to the start of the executable.
for _, r := range relsPerFunc {
r.Offset += int64(totalSize)
rels = append(rels, r)
}
bodies[i] = body
totalSize += len(body)
if wazevoapi.PrintMachineCodeHexPerFunction {
fmt.Printf("[[[machine code for %s]]]\n%s\n\n", wazevoapi.GetCurrentFunctionName(ctx), hex.EncodeToString(body))
}
if needCallTrampoline {
// If the total size exceeds the trampoline interval, we need to add a trampoline island.
if totalSize/trampolineInterval > len(callTrampolineIslandOffsets) {
callTrampolineIslandOffsets = append(callTrampolineIslandOffsets, totalSize)
totalSize += callTrampolineIslandSize
}
}
}
// Allocate executable memory and then copy the generated machine code.
executable, err := platform.MmapCodeSegment(totalSize)
if err != nil {
panic(err)
}
cm.executable = executable
for i, b := range bodies {
offset := cm.functionOffsets[i]
copy(executable[offset:], b)
}
if wazevoapi.PerfMapEnabled {
wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets)
}
if needSourceInfo {
for i := range cm.sourceMap.executableOffsets {
cm.sourceMap.executableOffsets[i] += uintptr(unsafe.Pointer(&cm.executable[0]))
}
}
// Resolve relocations for local function calls.
if len(rels) > 0 {
machine.ResolveRelocations(refToBinaryOffset, executable, rels, callTrampolineIslandOffsets)
}
if runtime.GOARCH == "arm64" {
// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
if err = platform.MprotectRX(executable); err != nil {
return nil, err
}
}
cm.sharedFunctions = e.sharedFunctions
e.setFinalizer(cm.executables, executablesFinalizer)
return cm, nil
}
func (e *engine) compileLocalWasmFunction(
ctx context.Context,
module *wasm.Module,
localFunctionIndex wasm.Index,
fe *frontend.Compiler,
ssaBuilder ssa.Builder,
be backend.Compiler,
needListener bool,
) (body []byte, rels []backend.RelocationInfo, err error) {
typIndex := module.FunctionSection[localFunctionIndex]
typ := &module.TypeSection[typIndex]
codeSeg := &module.CodeSection[localFunctionIndex]
// Initializes both frontend and backend compilers.
fe.Init(localFunctionIndex, typIndex, typ, codeSeg.LocalTypes, codeSeg.Body, needListener, codeSeg.BodyOffsetInCodeSection)
be.Init()
// Lower Wasm to SSA.
fe.LowerToSSA()
if wazevoapi.PrintSSA && wazevoapi.PrintEnabledIndex(ctx) {
fmt.Printf("[[[SSA for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), ssaBuilder.Format())
}
if wazevoapi.DeterministicCompilationVerifierEnabled {
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "SSA", ssaBuilder.Format())
}
// Run SSA-level optimization passes.
ssaBuilder.RunPasses()
if wazevoapi.PrintOptimizedSSA && wazevoapi.PrintEnabledIndex(ctx) {
fmt.Printf("[[[Optimized SSA for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), ssaBuilder.Format())
}
if wazevoapi.DeterministicCompilationVerifierEnabled {
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "Optimized SSA", ssaBuilder.Format())
}
// Now our ssaBuilder contains the necessary information to further lower them to
// machine code.
original, rels, err := be.Compile(ctx)
if err != nil {
return nil, nil, fmt.Errorf("ssa->machine code: %v", err)
}
// TODO: optimize as zero copy.
copied := make([]byte, len(original))
copy(copied, original)
return copied, rels, nil
}
func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener) (*compiledModule, error) {
machine := newMachine()
be := backend.NewCompiler(ctx, machine, ssa.NewBuilder())
num := len(module.CodeSection)
cm := &compiledModule{module: module, listeners: listeners, executables: &executables{}}
cm.functionOffsets = make([]int, num)
totalSize := 0 // Total binary size of the executable.
bodies := make([][]byte, num)
var sig ssa.Signature
for i := range module.CodeSection {
totalSize = (totalSize + 15) &^ 15
cm.functionOffsets[i] = totalSize
typIndex := module.FunctionSection[i]
typ := &module.TypeSection[typIndex]
// We can relax until the index fits together in ExitCode as we do in wazevoapi.ExitCodeCallGoModuleFunctionWithIndex.
// However, 1 << 16 should be large enough for a real use case.
const hostFunctionNumMaximum = 1 << 16
if i >= hostFunctionNumMaximum {
return nil, fmt.Errorf("too many host functions (maximum %d)", hostFunctionNumMaximum)
}
sig.ID = ssa.SignatureID(typIndex) // This is important since we reuse the `machine` which caches the ABI based on the SignatureID.
sig.Params = append(sig.Params[:0],
ssa.TypeI64, // First argument must be exec context.
ssa.TypeI64, // The second argument is the moduleContextOpaque of this host module.
)
for _, t := range typ.Params {
sig.Params = append(sig.Params, frontend.WasmTypeToSSAType(t))
}
sig.Results = sig.Results[:0]
for _, t := range typ.Results {
sig.Results = append(sig.Results, frontend.WasmTypeToSSAType(t))
}
c := &module.CodeSection[i]
if c.GoFunc == nil {
panic("BUG: GoFunc must be set for host module")
}
withListener := len(listeners) > 0 && listeners[i] != nil
var exitCode wazevoapi.ExitCode
fn := c.GoFunc
switch fn.(type) {
case api.GoModuleFunction:
exitCode = wazevoapi.ExitCodeCallGoModuleFunctionWithIndex(i, withListener)
case api.GoFunction:
exitCode = wazevoapi.ExitCodeCallGoFunctionWithIndex(i, withListener)
}
be.Init()
machine.CompileGoFunctionTrampoline(exitCode, &sig, true)
if err := be.Finalize(ctx); err != nil {
return nil, err
}
body := be.Buf()
if wazevoapi.PerfMapEnabled {
name := module.FunctionDefinition(wasm.Index(i)).DebugName()
wazevoapi.PerfMap.AddModuleEntry(i,
int64(totalSize),
uint64(len(body)),
fmt.Sprintf("trampoline:%s", name))
}
// TODO: optimize as zero copy.
copied := make([]byte, len(body))
copy(copied, body)
bodies[i] = copied
totalSize += len(body)
}
if totalSize == 0 {
// Empty module.
return cm, nil
}
// Allocate executable memory and then copy the generated machine code.
executable, err := platform.MmapCodeSegment(totalSize)
if err != nil {
panic(err)
}
cm.executable = executable
for i, b := range bodies {
offset := cm.functionOffsets[i]
copy(executable[offset:], b)
}
if wazevoapi.PerfMapEnabled {
wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets)
}
if runtime.GOARCH == "arm64" {
// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
if err = platform.MprotectRX(executable); err != nil {
return nil, err
}
}
e.setFinalizer(cm.executables, executablesFinalizer)
return cm, nil
}
// Close implements wasm.Engine.
func (e *engine) Close() (err error) {
e.mux.Lock()
defer e.mux.Unlock()
e.sortedCompiledModules = nil
e.compiledModules = nil
e.sharedFunctions = nil
return nil
}
// CompiledModuleCount implements wasm.Engine.
func (e *engine) CompiledModuleCount() uint32 {
e.mux.RLock()
defer e.mux.RUnlock()
return uint32(len(e.compiledModules))
}
// DeleteCompiledModule implements wasm.Engine.
func (e *engine) DeleteCompiledModule(m *wasm.Module) {
e.mux.Lock()
defer e.mux.Unlock()
cm, ok := e.compiledModules[m.ID]
if ok {
if len(cm.executable) > 0 {
e.deleteCompiledModuleFromSortedList(cm)
}
delete(e.compiledModules, m.ID)
}
}
func (e *engine) addCompiledModuleToSortedList(cm *compiledModule) {
ptr := uintptr(unsafe.Pointer(&cm.executable[0]))
index := sort.Search(len(e.sortedCompiledModules), func(i int) bool {
return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) >= ptr
})
e.sortedCompiledModules = append(e.sortedCompiledModules, nil)
copy(e.sortedCompiledModules[index+1:], e.sortedCompiledModules[index:])
e.sortedCompiledModules[index] = cm
}
func (e *engine) deleteCompiledModuleFromSortedList(cm *compiledModule) {
ptr := uintptr(unsafe.Pointer(&cm.executable[0]))
index := sort.Search(len(e.sortedCompiledModules), func(i int) bool {
return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) >= ptr
})
if index >= len(e.sortedCompiledModules) {
return
}
copy(e.sortedCompiledModules[index:], e.sortedCompiledModules[index+1:])
e.sortedCompiledModules = e.sortedCompiledModules[:len(e.sortedCompiledModules)-1]
}
func (e *engine) compiledModuleOfAddr(addr uintptr) *compiledModule {
e.mux.RLock()
defer e.mux.RUnlock()
index := sort.Search(len(e.sortedCompiledModules), func(i int) bool {
return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) > addr
})
index -= 1
if index < 0 {
return nil
}
candidate := e.sortedCompiledModules[index]
if checkAddrInBytes(addr, candidate.executable) {
// If a module is already deleted, the found module may have been wrong.
return candidate
}
return nil
}
func checkAddrInBytes(addr uintptr, b []byte) bool {
return uintptr(unsafe.Pointer(&b[0])) <= addr && addr <= uintptr(unsafe.Pointer(&b[len(b)-1]))
}
// NewModuleEngine implements wasm.Engine.
func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm.ModuleEngine, error) {
me := &moduleEngine{}
// Note: imported functions are resolved in moduleEngine.ResolveImportedFunction.
me.importedFunctions = make([]importedFunction, m.ImportFunctionCount)
compiled, ok := e.getCompiledModuleFromMemory(m)
if !ok {
return nil, errors.New("source module must be compiled before instantiation")
}
me.parent = compiled
me.module = mi
me.listeners = compiled.listeners
if m.IsHostModule {
me.opaque = buildHostModuleOpaque(m, compiled.listeners)
me.opaquePtr = &me.opaque[0]
} else {
if size := compiled.offsets.TotalSize; size != 0 {
opaque := newAlignedOpaque(size)
me.opaque = opaque
me.opaquePtr = &opaque[0]
}
}
return me, nil
}
func (e *engine) compileSharedFunctions() {
e.sharedFunctions = &sharedFunctions{
listenerBeforeTrampolines: make(map[*wasm.FunctionType][]byte),
listenerAfterTrampolines: make(map[*wasm.FunctionType][]byte),
}
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeGrowMemory, &ssa.Signature{
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32},
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.memoryGrowExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.memoryGrowExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_grow_trampoline")
}
}
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeTableGrow, &ssa.Signature{
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* table index */, ssa.TypeI32 /* num */, ssa.TypeI64 /* ref */},
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.tableGrowExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.tableGrowExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "table_grow_trampoline")
}
}
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCheckModuleExitCode, &ssa.Signature{
Params: []ssa.Type{ssa.TypeI32 /* exec context */},
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.checkModuleExitCode = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.checkModuleExitCode
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "check_module_exit_code_trampoline")
}
}
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeRefFunc, &ssa.Signature{
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* function index */},
Results: []ssa.Type{ssa.TypeI64}, // returns the function reference.
}, false)
e.sharedFunctions.refFuncExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.refFuncExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "ref_func_trampoline")
}
}
e.be.Init()
{
src := e.machine.CompileStackGrowCallSequence()
e.sharedFunctions.stackGrowExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.stackGrowExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "stack_grow_trampoline")
}
}
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait32, &ssa.Signature{
// exec context, timeout, expected, addr
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
// Returns the status.
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.memoryWait32Executable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.memoryWait32Executable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait32_trampoline")
}
}
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait64, &ssa.Signature{
// exec context, timeout, expected, addr
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64, ssa.TypeI64},
// Returns the status.
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.memoryWait64Executable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.memoryWait64Executable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait64_trampoline")
}
}
e.be.Init()
{
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryNotify, &ssa.Signature{
// exec context, count, addr
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
// Returns the number notified.
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.memoryNotifyExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.memoryNotifyExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_notify_trampoline")
}
}
e.setFinalizer(e.sharedFunctions, sharedFunctionsFinalizer)
}
func sharedFunctionsFinalizer(sf *sharedFunctions) {
if err := platform.MunmapCodeSegment(sf.memoryGrowExecutable); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.checkModuleExitCode); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.stackGrowExecutable); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.tableGrowExecutable); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.refFuncExecutable); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.memoryWait32Executable); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.memoryWait64Executable); err != nil {
panic(err)
}
if err := platform.MunmapCodeSegment(sf.memoryNotifyExecutable); err != nil {
panic(err)
}
for _, f := range sf.listenerBeforeTrampolines {
if err := platform.MunmapCodeSegment(f); err != nil {
panic(err)
}
}
for _, f := range sf.listenerAfterTrampolines {
if err := platform.MunmapCodeSegment(f); err != nil {
panic(err)
}
}
sf.memoryGrowExecutable = nil
sf.checkModuleExitCode = nil
sf.stackGrowExecutable = nil
sf.tableGrowExecutable = nil
sf.refFuncExecutable = nil
sf.memoryWait32Executable = nil
sf.memoryWait64Executable = nil
sf.memoryNotifyExecutable = nil
sf.listenerBeforeTrampolines = nil
sf.listenerAfterTrampolines = nil
}
func executablesFinalizer(exec *executables) {
if len(exec.executable) > 0 {
if err := platform.MunmapCodeSegment(exec.executable); err != nil {
panic(err)
}
}
exec.executable = nil
for _, f := range exec.entryPreambles {
if err := platform.MunmapCodeSegment(f); err != nil {
panic(err)
}
}
exec.entryPreambles = nil
}
func mmapExecutable(src []byte) []byte {
executable, err := platform.MmapCodeSegment(len(src))
if err != nil {
panic(err)
}
copy(executable, src)
if runtime.GOARCH == "arm64" {
// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
if err = platform.MprotectRX(executable); err != nil {
panic(err)
}
}
return executable
}
func (cm *compiledModule) functionIndexOf(addr uintptr) wasm.Index {
addr -= uintptr(unsafe.Pointer(&cm.executable[0]))
offset := cm.functionOffsets
index := sort.Search(len(offset), func(i int) bool {
return offset[i] > int(addr)
})
index--
if index < 0 {
panic("BUG")
}
return wasm.Index(index)
}
func (e *engine) getListenerTrampolineForType(functionType *wasm.FunctionType) (before, after *byte) {
e.mux.Lock()
defer e.mux.Unlock()
beforeBuf, ok := e.sharedFunctions.listenerBeforeTrampolines[functionType]
afterBuf := e.sharedFunctions.listenerAfterTrampolines[functionType]
if ok {
return &beforeBuf[0], &afterBuf[0]
}
beforeSig, afterSig := frontend.SignatureForListener(functionType)
e.be.Init()
buf := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerBefore, beforeSig, false)
beforeBuf = mmapExecutable(buf)
e.be.Init()
buf = e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerAfter, afterSig, false)
afterBuf = mmapExecutable(buf)
e.sharedFunctions.listenerBeforeTrampolines[functionType] = beforeBuf
e.sharedFunctions.listenerAfterTrampolines[functionType] = afterBuf
return &beforeBuf[0], &afterBuf[0]
}
func (cm *compiledModule) getSourceOffset(pc uintptr) uint64 {
offsets := cm.sourceMap.executableOffsets
if len(offsets) == 0 {
return 0
}
index := sort.Search(len(offsets), func(i int) bool {
return offsets[i] >= pc
})
index--
if index < 0 {
return 0
}
return cm.sourceMap.wasmBinaryOffsets[index]
}

View File

@ -0,0 +1,296 @@
package wazevo
import (
"bytes"
"context"
"crypto/sha256"
"encoding/binary"
"fmt"
"hash/crc32"
"io"
"runtime"
"unsafe"
"github.com/tetratelabs/wazero/experimental"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
"github.com/tetratelabs/wazero/internal/filecache"
"github.com/tetratelabs/wazero/internal/platform"
"github.com/tetratelabs/wazero/internal/u32"
"github.com/tetratelabs/wazero/internal/u64"
"github.com/tetratelabs/wazero/internal/wasm"
)
var crc = crc32.MakeTable(crc32.Castagnoli)
// fileCacheKey returns a key for the file cache.
// In order to avoid collisions with the existing compiler, we do not use m.ID directly,
// but instead we rehash it with magic.
func fileCacheKey(m *wasm.Module) (ret filecache.Key) {
s := sha256.New()
s.Write(m.ID[:])
s.Write(magic)
s.Sum(ret[:0])
return
}
func (e *engine) addCompiledModule(module *wasm.Module, cm *compiledModule) (err error) {
e.addCompiledModuleToMemory(module, cm)
if !module.IsHostModule && e.fileCache != nil {
err = e.addCompiledModuleToCache(module, cm)
}
return
}
func (e *engine) getCompiledModule(module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (cm *compiledModule, ok bool, err error) {
cm, ok = e.getCompiledModuleFromMemory(module)
if ok {
return
}
cm, ok, err = e.getCompiledModuleFromCache(module)
if ok {
cm.parent = e
cm.module = module
cm.sharedFunctions = e.sharedFunctions
cm.ensureTermination = ensureTermination
cm.offsets = wazevoapi.NewModuleContextOffsetData(module, len(listeners) > 0)
if len(listeners) > 0 {
cm.listeners = listeners
cm.listenerBeforeTrampolines = make([]*byte, len(module.TypeSection))
cm.listenerAfterTrampolines = make([]*byte, len(module.TypeSection))
for i := range module.TypeSection {
typ := &module.TypeSection[i]
before, after := e.getListenerTrampolineForType(typ)
cm.listenerBeforeTrampolines[i] = before
cm.listenerAfterTrampolines[i] = after
}
}
e.addCompiledModuleToMemory(module, cm)
ssaBuilder := ssa.NewBuilder()
machine := newMachine()
be := backend.NewCompiler(context.Background(), machine, ssaBuilder)
cm.executables.compileEntryPreambles(module, machine, be)
// Set the finalizer.
e.setFinalizer(cm.executables, executablesFinalizer)
}
return
}
func (e *engine) addCompiledModuleToMemory(m *wasm.Module, cm *compiledModule) {
e.mux.Lock()
defer e.mux.Unlock()
e.compiledModules[m.ID] = cm
if len(cm.executable) > 0 {
e.addCompiledModuleToSortedList(cm)
}
}
func (e *engine) getCompiledModuleFromMemory(module *wasm.Module) (cm *compiledModule, ok bool) {
e.mux.RLock()
defer e.mux.RUnlock()
cm, ok = e.compiledModules[module.ID]
return
}
func (e *engine) addCompiledModuleToCache(module *wasm.Module, cm *compiledModule) (err error) {
if e.fileCache == nil || module.IsHostModule {
return
}
err = e.fileCache.Add(fileCacheKey(module), serializeCompiledModule(e.wazeroVersion, cm))
return
}
func (e *engine) getCompiledModuleFromCache(module *wasm.Module) (cm *compiledModule, hit bool, err error) {
if e.fileCache == nil || module.IsHostModule {
return
}
// Check if the entries exist in the external cache.
var cached io.ReadCloser
cached, hit, err = e.fileCache.Get(fileCacheKey(module))
if !hit || err != nil {
return
}
// Otherwise, we hit the cache on external cache.
// We retrieve *code structures from `cached`.
var staleCache bool
// Note: cached.Close is ensured to be called in deserializeCodes.
cm, staleCache, err = deserializeCompiledModule(e.wazeroVersion, cached)
if err != nil {
hit = false
return
} else if staleCache {
return nil, false, e.fileCache.Delete(fileCacheKey(module))
}
return
}
var magic = []byte{'W', 'A', 'Z', 'E', 'V', 'O'}
func serializeCompiledModule(wazeroVersion string, cm *compiledModule) io.Reader {
buf := bytes.NewBuffer(nil)
// First 6 byte: WAZEVO header.
buf.Write(magic)
// Next 1 byte: length of version:
buf.WriteByte(byte(len(wazeroVersion)))
// Version of wazero.
buf.WriteString(wazeroVersion)
// Number of *code (== locally defined functions in the module): 4 bytes.
buf.Write(u32.LeBytes(uint32(len(cm.functionOffsets))))
for _, offset := range cm.functionOffsets {
// The offset of this function in the executable (8 bytes).
buf.Write(u64.LeBytes(uint64(offset)))
}
// The length of code segment (8 bytes).
buf.Write(u64.LeBytes(uint64(len(cm.executable))))
// Append the native code.
buf.Write(cm.executable)
// Append checksum.
checksum := crc32.Checksum(cm.executable, crc)
buf.Write(u32.LeBytes(checksum))
if sm := cm.sourceMap; len(sm.executableOffsets) > 0 {
buf.WriteByte(1) // indicates that source map is present.
l := len(sm.wasmBinaryOffsets)
buf.Write(u64.LeBytes(uint64(l)))
executableAddr := uintptr(unsafe.Pointer(&cm.executable[0]))
for i := 0; i < l; i++ {
buf.Write(u64.LeBytes(sm.wasmBinaryOffsets[i]))
// executableOffsets is absolute address, so we need to subtract executableAddr.
buf.Write(u64.LeBytes(uint64(sm.executableOffsets[i] - executableAddr)))
}
} else {
buf.WriteByte(0) // indicates that source map is not present.
}
return bytes.NewReader(buf.Bytes())
}
func deserializeCompiledModule(wazeroVersion string, reader io.ReadCloser) (cm *compiledModule, staleCache bool, err error) {
defer reader.Close()
cacheHeaderSize := len(magic) + 1 /* version size */ + len(wazeroVersion) + 4 /* number of functions */
// Read the header before the native code.
header := make([]byte, cacheHeaderSize)
n, err := reader.Read(header)
if err != nil {
return nil, false, fmt.Errorf("compilationcache: error reading header: %v", err)
}
if n != cacheHeaderSize {
return nil, false, fmt.Errorf("compilationcache: invalid header length: %d", n)
}
if !bytes.Equal(header[:len(magic)], magic) {
return nil, false, fmt.Errorf(
"compilationcache: invalid magic number: got %s but want %s", magic, header[:len(magic)])
}
// Check the version compatibility.
versionSize := int(header[len(magic)])
cachedVersionBegin, cachedVersionEnd := len(magic)+1, len(magic)+1+versionSize
if cachedVersionEnd >= len(header) {
staleCache = true
return
} else if cachedVersion := string(header[cachedVersionBegin:cachedVersionEnd]); cachedVersion != wazeroVersion {
staleCache = true
return
}
functionsNum := binary.LittleEndian.Uint32(header[len(header)-4:])
cm = &compiledModule{functionOffsets: make([]int, functionsNum), executables: &executables{}}
var eightBytes [8]byte
for i := uint32(0); i < functionsNum; i++ {
// Read the offset of each function in the executable.
var offset uint64
if offset, err = readUint64(reader, &eightBytes); err != nil {
err = fmt.Errorf("compilationcache: error reading func[%d] executable offset: %v", i, err)
return
}
cm.functionOffsets[i] = int(offset)
}
executableLen, err := readUint64(reader, &eightBytes)
if err != nil {
err = fmt.Errorf("compilationcache: error reading executable size: %v", err)
return
}
if executableLen > 0 {
executable, err := platform.MmapCodeSegment(int(executableLen))
if err != nil {
err = fmt.Errorf("compilationcache: error mmapping executable (len=%d): %v", executableLen, err)
return nil, false, err
}
_, err = io.ReadFull(reader, executable)
if err != nil {
err = fmt.Errorf("compilationcache: error reading executable (len=%d): %v", executableLen, err)
return nil, false, err
}
expected := crc32.Checksum(executable, crc)
if _, err = io.ReadFull(reader, eightBytes[:4]); err != nil {
return nil, false, fmt.Errorf("compilationcache: could not read checksum: %v", err)
} else if checksum := binary.LittleEndian.Uint32(eightBytes[:4]); expected != checksum {
return nil, false, fmt.Errorf("compilationcache: checksum mismatch (expected %d, got %d)", expected, checksum)
}
if runtime.GOARCH == "arm64" {
// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
if err = platform.MprotectRX(executable); err != nil {
return nil, false, err
}
}
cm.executable = executable
}
if _, err := io.ReadFull(reader, eightBytes[:1]); err != nil {
return nil, false, fmt.Errorf("compilationcache: error reading source map presence: %v", err)
}
if eightBytes[0] == 1 {
sm := &cm.sourceMap
sourceMapLen, err := readUint64(reader, &eightBytes)
if err != nil {
err = fmt.Errorf("compilationcache: error reading source map length: %v", err)
return nil, false, err
}
executableOffset := uintptr(unsafe.Pointer(&cm.executable[0]))
for i := uint64(0); i < sourceMapLen; i++ {
wasmBinaryOffset, err := readUint64(reader, &eightBytes)
if err != nil {
err = fmt.Errorf("compilationcache: error reading source map[%d] wasm binary offset: %v", i, err)
return nil, false, err
}
executableRelativeOffset, err := readUint64(reader, &eightBytes)
if err != nil {
err = fmt.Errorf("compilationcache: error reading source map[%d] executable offset: %v", i, err)
return nil, false, err
}
sm.wasmBinaryOffsets = append(sm.wasmBinaryOffsets, wasmBinaryOffset)
// executableOffsets is absolute address, so we need to add executableOffset.
sm.executableOffsets = append(sm.executableOffsets, uintptr(executableRelativeOffset)+executableOffset)
}
}
return
}
// readUint64 strictly reads an uint64 in little-endian byte order, using the
// given array as a buffer. This returns io.EOF if less than 8 bytes were read.
func readUint64(reader io.Reader, b *[8]byte) (uint64, error) {
s := b[0:8]
n, err := reader.Read(s)
if err != nil {
return 0, err
} else if n < 8 { // more strict than reader.Read
return 0, io.EOF
}
// Read the u64 from the underlying buffer.
ret := binary.LittleEndian.Uint64(s)
return ret, nil
}

View File

@ -0,0 +1,15 @@
//go:build amd64 && !tinygo
package wazevo
import _ "unsafe"
// entrypoint is implemented by the backend.
//
//go:linkname entrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64.entrypoint
func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr)
// entrypoint is implemented by the backend.
//
//go:linkname afterGoFunctionCallEntrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64.afterGoFunctionCallEntrypoint
func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)

View File

@ -0,0 +1,15 @@
//go:build arm64 && !tinygo
package wazevo
import _ "unsafe"
// entrypoint is implemented by the backend.
//
//go:linkname entrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64.entrypoint
func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr)
// entrypoint is implemented by the backend.
//
//go:linkname afterGoFunctionCallEntrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64.afterGoFunctionCallEntrypoint
func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)

View File

@ -0,0 +1,15 @@
//go:build (!arm64 && !amd64) || tinygo
package wazevo
import (
"runtime"
)
func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr) {
panic(runtime.GOARCH)
}
func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr) {
panic(runtime.GOARCH)
}

View File

@ -0,0 +1,594 @@
// Package frontend implements the translation of WebAssembly to SSA IR using the ssa package.
package frontend
import (
"bytes"
"math"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
"github.com/tetratelabs/wazero/internal/wasm"
)
// Compiler is in charge of lowering Wasm to SSA IR, and does the optimization
// on top of it in architecture-independent way.
type Compiler struct {
// Per-module data that is used across all functions.
m *wasm.Module
offset *wazevoapi.ModuleContextOffsetData
// ssaBuilder is a ssa.Builder used by this frontend.
ssaBuilder ssa.Builder
signatures map[*wasm.FunctionType]*ssa.Signature
listenerSignatures map[*wasm.FunctionType][2]*ssa.Signature
memoryGrowSig ssa.Signature
memoryWait32Sig ssa.Signature
memoryWait64Sig ssa.Signature
memoryNotifySig ssa.Signature
checkModuleExitCodeSig ssa.Signature
tableGrowSig ssa.Signature
refFuncSig ssa.Signature
memmoveSig ssa.Signature
ensureTermination bool
// Followings are reset by per function.
// wasmLocalToVariable maps the index (considered as wasm.Index of locals)
// to the corresponding ssa.Variable.
wasmLocalToVariable [] /* local index to */ ssa.Variable
wasmLocalFunctionIndex wasm.Index
wasmFunctionTypeIndex wasm.Index
wasmFunctionTyp *wasm.FunctionType
wasmFunctionLocalTypes []wasm.ValueType
wasmFunctionBody []byte
wasmFunctionBodyOffsetInCodeSection uint64
memoryBaseVariable, memoryLenVariable ssa.Variable
needMemory bool
memoryShared bool
globalVariables []ssa.Variable
globalVariablesTypes []ssa.Type
mutableGlobalVariablesIndexes []wasm.Index // index to ^.
needListener bool
needSourceOffsetInfo bool
// br is reused during lowering.
br *bytes.Reader
loweringState loweringState
knownSafeBounds [] /* ssa.ValueID to */ knownSafeBound
knownSafeBoundsSet []ssa.ValueID
knownSafeBoundsAtTheEndOfBlocks [] /* ssa.BlockID to */ knownSafeBoundsAtTheEndOfBlock
varLengthKnownSafeBoundWithIDPool wazevoapi.VarLengthPool[knownSafeBoundWithID]
execCtxPtrValue, moduleCtxPtrValue ssa.Value
// Following are reused for the known safe bounds analysis.
pointers []int
bounds [][]knownSafeBoundWithID
}
type (
// knownSafeBound represents a known safe bound for a value.
knownSafeBound struct {
// bound is a constant upper bound for the value.
bound uint64
// absoluteAddr is the absolute address of the value.
absoluteAddr ssa.Value
}
// knownSafeBoundWithID is a knownSafeBound with the ID of the value.
knownSafeBoundWithID struct {
knownSafeBound
id ssa.ValueID
}
knownSafeBoundsAtTheEndOfBlock = wazevoapi.VarLength[knownSafeBoundWithID]
)
var knownSafeBoundsAtTheEndOfBlockNil = wazevoapi.NewNilVarLength[knownSafeBoundWithID]()
// NewFrontendCompiler returns a frontend Compiler.
func NewFrontendCompiler(m *wasm.Module, ssaBuilder ssa.Builder, offset *wazevoapi.ModuleContextOffsetData, ensureTermination bool, listenerOn bool, sourceInfo bool) *Compiler {
c := &Compiler{
m: m,
ssaBuilder: ssaBuilder,
br: bytes.NewReader(nil),
offset: offset,
ensureTermination: ensureTermination,
needSourceOffsetInfo: sourceInfo,
varLengthKnownSafeBoundWithIDPool: wazevoapi.NewVarLengthPool[knownSafeBoundWithID](),
}
c.declareSignatures(listenerOn)
return c
}
func (c *Compiler) declareSignatures(listenerOn bool) {
m := c.m
c.signatures = make(map[*wasm.FunctionType]*ssa.Signature, len(m.TypeSection)+2)
if listenerOn {
c.listenerSignatures = make(map[*wasm.FunctionType][2]*ssa.Signature, len(m.TypeSection))
}
for i := range m.TypeSection {
wasmSig := &m.TypeSection[i]
sig := SignatureForWasmFunctionType(wasmSig)
sig.ID = ssa.SignatureID(i)
c.signatures[wasmSig] = &sig
c.ssaBuilder.DeclareSignature(&sig)
if listenerOn {
beforeSig, afterSig := SignatureForListener(wasmSig)
beforeSig.ID = ssa.SignatureID(i) + ssa.SignatureID(len(m.TypeSection))
afterSig.ID = ssa.SignatureID(i) + ssa.SignatureID(len(m.TypeSection))*2
c.listenerSignatures[wasmSig] = [2]*ssa.Signature{beforeSig, afterSig}
c.ssaBuilder.DeclareSignature(beforeSig)
c.ssaBuilder.DeclareSignature(afterSig)
}
}
begin := ssa.SignatureID(len(m.TypeSection))
if listenerOn {
begin *= 3
}
c.memoryGrowSig = ssa.Signature{
ID: begin,
// Takes execution context and the page size to grow.
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32},
// Returns the previous page size.
Results: []ssa.Type{ssa.TypeI32},
}
c.ssaBuilder.DeclareSignature(&c.memoryGrowSig)
c.checkModuleExitCodeSig = ssa.Signature{
ID: c.memoryGrowSig.ID + 1,
// Only takes execution context.
Params: []ssa.Type{ssa.TypeI64},
}
c.ssaBuilder.DeclareSignature(&c.checkModuleExitCodeSig)
c.tableGrowSig = ssa.Signature{
ID: c.checkModuleExitCodeSig.ID + 1,
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* table index */, ssa.TypeI32 /* num */, ssa.TypeI64 /* ref */},
// Returns the previous size.
Results: []ssa.Type{ssa.TypeI32},
}
c.ssaBuilder.DeclareSignature(&c.tableGrowSig)
c.refFuncSig = ssa.Signature{
ID: c.tableGrowSig.ID + 1,
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* func index */},
// Returns the function reference.
Results: []ssa.Type{ssa.TypeI64},
}
c.ssaBuilder.DeclareSignature(&c.refFuncSig)
c.memmoveSig = ssa.Signature{
ID: c.refFuncSig.ID + 1,
// dst, src, and the byte count.
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64},
}
c.ssaBuilder.DeclareSignature(&c.memmoveSig)
c.memoryWait32Sig = ssa.Signature{
ID: c.memmoveSig.ID + 1,
// exec context, timeout, expected, addr
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
// Returns the status.
Results: []ssa.Type{ssa.TypeI32},
}
c.ssaBuilder.DeclareSignature(&c.memoryWait32Sig)
c.memoryWait64Sig = ssa.Signature{
ID: c.memoryWait32Sig.ID + 1,
// exec context, timeout, expected, addr
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64, ssa.TypeI64},
// Returns the status.
Results: []ssa.Type{ssa.TypeI32},
}
c.ssaBuilder.DeclareSignature(&c.memoryWait64Sig)
c.memoryNotifySig = ssa.Signature{
ID: c.memoryWait64Sig.ID + 1,
// exec context, count, addr
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
// Returns the number notified.
Results: []ssa.Type{ssa.TypeI32},
}
c.ssaBuilder.DeclareSignature(&c.memoryNotifySig)
}
// SignatureForWasmFunctionType returns the ssa.Signature for the given wasm.FunctionType.
func SignatureForWasmFunctionType(typ *wasm.FunctionType) ssa.Signature {
sig := ssa.Signature{
// +2 to pass moduleContextPtr and executionContextPtr. See the inline comment LowerToSSA.
Params: make([]ssa.Type, len(typ.Params)+2),
Results: make([]ssa.Type, len(typ.Results)),
}
sig.Params[0] = executionContextPtrTyp
sig.Params[1] = moduleContextPtrTyp
for j, typ := range typ.Params {
sig.Params[j+2] = WasmTypeToSSAType(typ)
}
for j, typ := range typ.Results {
sig.Results[j] = WasmTypeToSSAType(typ)
}
return sig
}
// Init initializes the state of frontendCompiler and make it ready for a next function.
func (c *Compiler) Init(idx, typIndex wasm.Index, typ *wasm.FunctionType, localTypes []wasm.ValueType, body []byte, needListener bool, bodyOffsetInCodeSection uint64) {
c.ssaBuilder.Init(c.signatures[typ])
c.loweringState.reset()
c.wasmFunctionTypeIndex = typIndex
c.wasmLocalFunctionIndex = idx
c.wasmFunctionTyp = typ
c.wasmFunctionLocalTypes = localTypes
c.wasmFunctionBody = body
c.wasmFunctionBodyOffsetInCodeSection = bodyOffsetInCodeSection
c.needListener = needListener
c.clearSafeBounds()
c.varLengthKnownSafeBoundWithIDPool.Reset()
c.knownSafeBoundsAtTheEndOfBlocks = c.knownSafeBoundsAtTheEndOfBlocks[:0]
}
// Note: this assumes 64-bit platform (I believe we won't have 32-bit backend ;)).
const executionContextPtrTyp, moduleContextPtrTyp = ssa.TypeI64, ssa.TypeI64
// LowerToSSA lowers the current function to SSA function which will be held by ssaBuilder.
// After calling this, the caller will be able to access the SSA info in *Compiler.ssaBuilder.
//
// Note that this only does the naive lowering, and do not do any optimization, instead the caller is expected to do so.
func (c *Compiler) LowerToSSA() {
builder := c.ssaBuilder
// Set up the entry block.
entryBlock := builder.AllocateBasicBlock()
builder.SetCurrentBlock(entryBlock)
// Functions always take two parameters in addition to Wasm-level parameters:
//
// 1. executionContextPtr: pointer to the *executionContext in wazevo package.
// This will be used to exit the execution in the face of trap, plus used for host function calls.
//
// 2. moduleContextPtr: pointer to the *moduleContextOpaque in wazevo package.
// This will be used to access memory, etc. Also, this will be used during host function calls.
//
// Note: it's clear that sometimes a function won't need them. For example,
// if the function doesn't trap and doesn't make function call, then
// we might be able to eliminate the parameter. However, if that function
// can be called via call_indirect, then we cannot eliminate because the
// signature won't match with the expected one.
// TODO: maybe there's some way to do this optimization without glitches, but so far I have no clue about the feasibility.
//
// Note: In Wasmtime or many other runtimes, moduleContextPtr is called "vmContext". Also note that `moduleContextPtr`
// is wazero-specific since other runtimes can naturally use the OS-level signal to do this job thanks to the fact that
// they can use native stack vs wazero cannot use Go-routine stack and have to use Go-runtime allocated []byte as a stack.
c.execCtxPtrValue = entryBlock.AddParam(builder, executionContextPtrTyp)
c.moduleCtxPtrValue = entryBlock.AddParam(builder, moduleContextPtrTyp)
builder.AnnotateValue(c.execCtxPtrValue, "exec_ctx")
builder.AnnotateValue(c.moduleCtxPtrValue, "module_ctx")
for i, typ := range c.wasmFunctionTyp.Params {
st := WasmTypeToSSAType(typ)
variable := builder.DeclareVariable(st)
value := entryBlock.AddParam(builder, st)
builder.DefineVariable(variable, value, entryBlock)
c.setWasmLocalVariable(wasm.Index(i), variable)
}
c.declareWasmLocals(entryBlock)
c.declareNecessaryVariables()
c.lowerBody(entryBlock)
}
// localVariable returns the SSA variable for the given Wasm local index.
func (c *Compiler) localVariable(index wasm.Index) ssa.Variable {
return c.wasmLocalToVariable[index]
}
func (c *Compiler) setWasmLocalVariable(index wasm.Index, variable ssa.Variable) {
idx := int(index)
if idx >= len(c.wasmLocalToVariable) {
c.wasmLocalToVariable = append(c.wasmLocalToVariable, make([]ssa.Variable, idx+1-len(c.wasmLocalToVariable))...)
}
c.wasmLocalToVariable[idx] = variable
}
// declareWasmLocals declares the SSA variables for the Wasm locals.
func (c *Compiler) declareWasmLocals(entry ssa.BasicBlock) {
localCount := wasm.Index(len(c.wasmFunctionTyp.Params))
for i, typ := range c.wasmFunctionLocalTypes {
st := WasmTypeToSSAType(typ)
variable := c.ssaBuilder.DeclareVariable(st)
c.setWasmLocalVariable(wasm.Index(i)+localCount, variable)
zeroInst := c.ssaBuilder.AllocateInstruction()
switch st {
case ssa.TypeI32:
zeroInst.AsIconst32(0)
case ssa.TypeI64:
zeroInst.AsIconst64(0)
case ssa.TypeF32:
zeroInst.AsF32const(0)
case ssa.TypeF64:
zeroInst.AsF64const(0)
case ssa.TypeV128:
zeroInst.AsVconst(0, 0)
default:
panic("TODO: " + wasm.ValueTypeName(typ))
}
c.ssaBuilder.InsertInstruction(zeroInst)
value := zeroInst.Return()
c.ssaBuilder.DefineVariable(variable, value, entry)
}
}
func (c *Compiler) declareNecessaryVariables() {
if c.needMemory = c.m.MemorySection != nil; c.needMemory {
c.memoryShared = c.m.MemorySection.IsShared
} else if c.needMemory = c.m.ImportMemoryCount > 0; c.needMemory {
for _, imp := range c.m.ImportSection {
if imp.Type == wasm.ExternTypeMemory {
c.memoryShared = imp.DescMem.IsShared
break
}
}
}
if c.needMemory {
c.memoryBaseVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64)
c.memoryLenVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64)
}
c.globalVariables = c.globalVariables[:0]
c.mutableGlobalVariablesIndexes = c.mutableGlobalVariablesIndexes[:0]
c.globalVariablesTypes = c.globalVariablesTypes[:0]
for _, imp := range c.m.ImportSection {
if imp.Type == wasm.ExternTypeGlobal {
desc := imp.DescGlobal
c.declareWasmGlobal(desc.ValType, desc.Mutable)
}
}
for _, g := range c.m.GlobalSection {
desc := g.Type
c.declareWasmGlobal(desc.ValType, desc.Mutable)
}
// TODO: add tables.
}
func (c *Compiler) declareWasmGlobal(typ wasm.ValueType, mutable bool) {
var st ssa.Type
switch typ {
case wasm.ValueTypeI32:
st = ssa.TypeI32
case wasm.ValueTypeI64,
// Both externref and funcref are represented as I64 since we only support 64-bit platforms.
wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
st = ssa.TypeI64
case wasm.ValueTypeF32:
st = ssa.TypeF32
case wasm.ValueTypeF64:
st = ssa.TypeF64
case wasm.ValueTypeV128:
st = ssa.TypeV128
default:
panic("TODO: " + wasm.ValueTypeName(typ))
}
v := c.ssaBuilder.DeclareVariable(st)
index := wasm.Index(len(c.globalVariables))
c.globalVariables = append(c.globalVariables, v)
c.globalVariablesTypes = append(c.globalVariablesTypes, st)
if mutable {
c.mutableGlobalVariablesIndexes = append(c.mutableGlobalVariablesIndexes, index)
}
}
// WasmTypeToSSAType converts wasm.ValueType to ssa.Type.
func WasmTypeToSSAType(vt wasm.ValueType) ssa.Type {
switch vt {
case wasm.ValueTypeI32:
return ssa.TypeI32
case wasm.ValueTypeI64,
// Both externref and funcref are represented as I64 since we only support 64-bit platforms.
wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
return ssa.TypeI64
case wasm.ValueTypeF32:
return ssa.TypeF32
case wasm.ValueTypeF64:
return ssa.TypeF64
case wasm.ValueTypeV128:
return ssa.TypeV128
default:
panic("TODO: " + wasm.ValueTypeName(vt))
}
}
// addBlockParamsFromWasmTypes adds the block parameters to the given block.
func (c *Compiler) addBlockParamsFromWasmTypes(tps []wasm.ValueType, blk ssa.BasicBlock) {
for _, typ := range tps {
st := WasmTypeToSSAType(typ)
blk.AddParam(c.ssaBuilder, st)
}
}
// formatBuilder outputs the constructed SSA function as a string with a source information.
func (c *Compiler) formatBuilder() string {
return c.ssaBuilder.Format()
}
// SignatureForListener returns the signatures for the listener functions.
func SignatureForListener(wasmSig *wasm.FunctionType) (*ssa.Signature, *ssa.Signature) {
beforeSig := &ssa.Signature{}
beforeSig.Params = make([]ssa.Type, len(wasmSig.Params)+2)
beforeSig.Params[0] = ssa.TypeI64 // Execution context.
beforeSig.Params[1] = ssa.TypeI32 // Function index.
for i, p := range wasmSig.Params {
beforeSig.Params[i+2] = WasmTypeToSSAType(p)
}
afterSig := &ssa.Signature{}
afterSig.Params = make([]ssa.Type, len(wasmSig.Results)+2)
afterSig.Params[0] = ssa.TypeI64 // Execution context.
afterSig.Params[1] = ssa.TypeI32 // Function index.
for i, p := range wasmSig.Results {
afterSig.Params[i+2] = WasmTypeToSSAType(p)
}
return beforeSig, afterSig
}
// isBoundSafe returns true if the given value is known to be safe to access up to the given bound.
func (c *Compiler) getKnownSafeBound(v ssa.ValueID) *knownSafeBound {
if int(v) >= len(c.knownSafeBounds) {
return nil
}
return &c.knownSafeBounds[v]
}
// recordKnownSafeBound records the given safe bound for the given value.
func (c *Compiler) recordKnownSafeBound(v ssa.ValueID, safeBound uint64, absoluteAddr ssa.Value) {
if int(v) >= len(c.knownSafeBounds) {
c.knownSafeBounds = append(c.knownSafeBounds, make([]knownSafeBound, v+1)...)
}
if exiting := c.knownSafeBounds[v]; exiting.bound == 0 {
c.knownSafeBounds[v] = knownSafeBound{
bound: safeBound,
absoluteAddr: absoluteAddr,
}
c.knownSafeBoundsSet = append(c.knownSafeBoundsSet, v)
} else if safeBound > exiting.bound {
c.knownSafeBounds[v].bound = safeBound
}
}
// clearSafeBounds clears the known safe bounds.
func (c *Compiler) clearSafeBounds() {
for _, v := range c.knownSafeBoundsSet {
ptr := &c.knownSafeBounds[v]
ptr.bound = 0
ptr.absoluteAddr = ssa.ValueInvalid
}
c.knownSafeBoundsSet = c.knownSafeBoundsSet[:0]
}
// resetAbsoluteAddressInSafeBounds resets the absolute addresses recorded in the known safe bounds.
func (c *Compiler) resetAbsoluteAddressInSafeBounds() {
for _, v := range c.knownSafeBoundsSet {
ptr := &c.knownSafeBounds[v]
ptr.absoluteAddr = ssa.ValueInvalid
}
}
func (k *knownSafeBound) valid() bool {
return k != nil && k.bound > 0
}
func (c *Compiler) allocateVarLengthValues(_cap int, vs ...ssa.Value) ssa.Values {
builder := c.ssaBuilder
pool := builder.VarLengthPool()
args := pool.Allocate(_cap)
args = args.Append(builder.VarLengthPool(), vs...)
return args
}
func (c *Compiler) finalizeKnownSafeBoundsAtTheEndOfBlock(bID ssa.BasicBlockID) {
_bID := int(bID)
if l := len(c.knownSafeBoundsAtTheEndOfBlocks); _bID >= l {
c.knownSafeBoundsAtTheEndOfBlocks = append(c.knownSafeBoundsAtTheEndOfBlocks,
make([]knownSafeBoundsAtTheEndOfBlock, _bID+1-len(c.knownSafeBoundsAtTheEndOfBlocks))...)
for i := l; i < len(c.knownSafeBoundsAtTheEndOfBlocks); i++ {
c.knownSafeBoundsAtTheEndOfBlocks[i] = knownSafeBoundsAtTheEndOfBlockNil
}
}
p := &c.varLengthKnownSafeBoundWithIDPool
size := len(c.knownSafeBoundsSet)
allocated := c.varLengthKnownSafeBoundWithIDPool.Allocate(size)
// Sort the known safe bounds by the value ID so that we can use the intersection algorithm in initializeCurrentBlockKnownBounds.
sortSSAValueIDs(c.knownSafeBoundsSet)
for _, vID := range c.knownSafeBoundsSet {
kb := c.knownSafeBounds[vID]
allocated = allocated.Append(p, knownSafeBoundWithID{
knownSafeBound: kb,
id: vID,
})
}
c.knownSafeBoundsAtTheEndOfBlocks[bID] = allocated
c.clearSafeBounds()
}
func (c *Compiler) initializeCurrentBlockKnownBounds() {
currentBlk := c.ssaBuilder.CurrentBlock()
switch preds := currentBlk.Preds(); preds {
case 0:
case 1:
pred := currentBlk.Pred(0).ID()
for _, kb := range c.getKnownSafeBoundsAtTheEndOfBlocks(pred).View() {
// Unless the block is sealed, we cannot assume the absolute address is valid:
// later we might add another predecessor that has no visibility of that value.
addr := ssa.ValueInvalid
if currentBlk.Sealed() {
addr = kb.absoluteAddr
}
c.recordKnownSafeBound(kb.id, kb.bound, addr)
}
default:
c.pointers = c.pointers[:0]
c.bounds = c.bounds[:0]
for i := 0; i < preds; i++ {
c.bounds = append(c.bounds, c.getKnownSafeBoundsAtTheEndOfBlocks(currentBlk.Pred(i).ID()).View())
c.pointers = append(c.pointers, 0)
}
// If there are multiple predecessors, we need to find the intersection of the known safe bounds.
outer:
for {
smallestID := ssa.ValueID(math.MaxUint32)
for i, ptr := range c.pointers {
if ptr >= len(c.bounds[i]) {
break outer
}
cb := &c.bounds[i][ptr]
if id := cb.id; id < smallestID {
smallestID = cb.id
}
}
// Check if current elements are the same across all lists.
same := true
minBound := uint64(math.MaxUint64)
for i := 0; i < preds; i++ {
cb := &c.bounds[i][c.pointers[i]]
if cb.id != smallestID {
same = false
break
} else {
if cb.bound < minBound {
minBound = cb.bound
}
}
}
if same { // All elements are the same.
// Absolute address cannot be used in the intersection since the value might be only defined in one of the predecessors.
c.recordKnownSafeBound(smallestID, minBound, ssa.ValueInvalid)
}
// Move pointer(s) for the smallest ID forward (if same, move all).
for i := 0; i < preds; i++ {
cb := &c.bounds[i][c.pointers[i]]
if cb.id == smallestID {
c.pointers[i]++
}
}
}
}
}
func (c *Compiler) getKnownSafeBoundsAtTheEndOfBlocks(id ssa.BasicBlockID) knownSafeBoundsAtTheEndOfBlock {
if int(id) >= len(c.knownSafeBoundsAtTheEndOfBlocks) {
return knownSafeBoundsAtTheEndOfBlockNil
}
return c.knownSafeBoundsAtTheEndOfBlocks[id]
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
package frontend
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/wasm"
)
func FunctionIndexToFuncRef(idx wasm.Index) ssa.FuncRef {
return ssa.FuncRef(idx)
}

View File

@ -0,0 +1,15 @@
//go:build go1.21
package frontend
import (
"slices"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
func sortSSAValueIDs(IDs []ssa.ValueID) {
slices.SortFunc(IDs, func(i, j ssa.ValueID) int {
return int(i) - int(j)
})
}

View File

@ -0,0 +1,17 @@
//go:build !go1.21
// TODO: delete after the floor Go version is 1.21
package frontend
import (
"sort"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
func sortSSAValueIDs(IDs []ssa.ValueID) {
sort.SliceStable(IDs, func(i, j int) bool {
return int(IDs[i]) < int(IDs[j])
})
}

View File

@ -0,0 +1,82 @@
package wazevo
import (
"encoding/binary"
"reflect"
"unsafe"
"github.com/tetratelabs/wazero/experimental"
"github.com/tetratelabs/wazero/internal/wasm"
)
func buildHostModuleOpaque(m *wasm.Module, listeners []experimental.FunctionListener) moduleContextOpaque {
size := len(m.CodeSection)*16 + 32
ret := newAlignedOpaque(size)
binary.LittleEndian.PutUint64(ret[0:], uint64(uintptr(unsafe.Pointer(m))))
if len(listeners) > 0 {
sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&listeners))
binary.LittleEndian.PutUint64(ret[8:], uint64(sliceHeader.Data))
binary.LittleEndian.PutUint64(ret[16:], uint64(sliceHeader.Len))
binary.LittleEndian.PutUint64(ret[24:], uint64(sliceHeader.Cap))
}
offset := 32
for i := range m.CodeSection {
goFn := m.CodeSection[i].GoFunc
writeIface(goFn, ret[offset:])
offset += 16
}
return ret
}
func hostModuleFromOpaque(opaqueBegin uintptr) *wasm.Module {
var opaqueViewOverSlice []byte
sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverSlice))
sh.Data = opaqueBegin
sh.Len = 32
sh.Cap = 32
return *(**wasm.Module)(unsafe.Pointer(&opaqueViewOverSlice[0]))
}
func hostModuleListenersSliceFromOpaque(opaqueBegin uintptr) []experimental.FunctionListener {
var opaqueViewOverSlice []byte
sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverSlice))
sh.Data = opaqueBegin
sh.Len = 32
sh.Cap = 32
b := binary.LittleEndian.Uint64(opaqueViewOverSlice[8:])
l := binary.LittleEndian.Uint64(opaqueViewOverSlice[16:])
c := binary.LittleEndian.Uint64(opaqueViewOverSlice[24:])
var ret []experimental.FunctionListener
sh = (*reflect.SliceHeader)(unsafe.Pointer(&ret))
sh.Data = uintptr(b)
setSliceLimits(sh, uintptr(l), uintptr(c))
return ret
}
func hostModuleGoFuncFromOpaque[T any](index int, opaqueBegin uintptr) T {
offset := uintptr(index*16) + 32
ptr := opaqueBegin + offset
var opaqueViewOverFunction []byte
sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverFunction))
sh.Data = ptr
sh.Len = 16
sh.Cap = 16
return readIface(opaqueViewOverFunction).(T)
}
func writeIface(goFn interface{}, buf []byte) {
goFnIface := *(*[2]uint64)(unsafe.Pointer(&goFn))
binary.LittleEndian.PutUint64(buf, goFnIface[0])
binary.LittleEndian.PutUint64(buf[8:], goFnIface[1])
}
func readIface(buf []byte) interface{} {
b := binary.LittleEndian.Uint64(buf)
s := binary.LittleEndian.Uint64(buf[8:])
return *(*interface{})(unsafe.Pointer(&[2]uint64{b, s}))
}

View File

@ -0,0 +1,30 @@
//go:build amd64
package wazevo
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64"
)
func newMachine() backend.Machine {
return amd64.NewBackend()
}
// unwindStack is a function to unwind the stack, and appends return addresses to `returnAddresses` slice.
// The implementation must be aligned with the ABI/Calling convention.
func unwindStack(sp, fp, top uintptr, returnAddresses []uintptr) []uintptr {
return amd64.UnwindStack(sp, fp, top, returnAddresses)
}
// goCallStackView is a function to get a view of the stack before a Go call, which
// is the view of the stack allocated in CompileGoFunctionTrampoline.
func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
return amd64.GoCallStackView(stackPointerBeforeGoCall)
}
// adjustClonedStack is a function to adjust the stack after it is grown.
// More precisely, absolute addresses (frame pointers) in the stack must be adjusted.
func adjustClonedStack(oldsp, oldTop, sp, fp, top uintptr) {
amd64.AdjustClonedStack(oldsp, oldTop, sp, fp, top)
}

View File

@ -0,0 +1,32 @@
//go:build arm64
package wazevo
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64"
)
func newMachine() backend.Machine {
return arm64.NewBackend()
}
// unwindStack is a function to unwind the stack, and appends return addresses to `returnAddresses` slice.
// The implementation must be aligned with the ABI/Calling convention.
func unwindStack(sp, fp, top uintptr, returnAddresses []uintptr) []uintptr {
return arm64.UnwindStack(sp, fp, top, returnAddresses)
}
// goCallStackView is a function to get a view of the stack before a Go call, which
// is the view of the stack allocated in CompileGoFunctionTrampoline.
func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
return arm64.GoCallStackView(stackPointerBeforeGoCall)
}
// adjustClonedStack is a function to adjust the stack after it is grown.
// More precisely, absolute addresses (frame pointers) in the stack must be adjusted.
func adjustClonedStack(oldsp, oldTop, sp, fp, top uintptr) {
// TODO: currently, the frame pointers are not used, and saved old sps are relative to the current stack pointer,
// so no need to adjustment on arm64. However, when we make it absolute, which in my opinion is better perf-wise
// at the expense of slightly costly stack growth, we need to adjust the pushed frame pointers.
}

View File

@ -0,0 +1,29 @@
//go:build !(amd64 || arm64)
package wazevo
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
)
func newMachine() backend.Machine {
panic("unsupported architecture")
}
// unwindStack is a function to unwind the stack, and appends return addresses to `returnAddresses` slice.
// The implementation must be aligned with the ABI/Calling convention.
func unwindStack(sp, fp, top uintptr, returnAddresses []uintptr) []uintptr {
panic("unsupported architecture")
}
// goCallStackView is a function to get a view of the stack before a Go call, which
// is the view of the stack allocated in CompileGoFunctionTrampoline.
func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
panic("unsupported architecture")
}
// adjustClonedStack is a function to adjust the stack after it is grown.
// More precisely, absolute addresses (frame pointers) in the stack must be adjusted.
func adjustClonedStack(oldsp, oldTop, sp, fp, top uintptr) {
panic("unsupported architecture")
}

View File

@ -0,0 +1,11 @@
package wazevo
import (
"reflect"
"unsafe"
)
//go:linkname memmove runtime.memmove
func memmove(_, _ unsafe.Pointer, _ uintptr)
var memmovPtr = reflect.ValueOf(memmove).Pointer()

View File

@ -0,0 +1,344 @@
package wazevo
import (
"encoding/binary"
"unsafe"
"github.com/tetratelabs/wazero/api"
"github.com/tetratelabs/wazero/experimental"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
"github.com/tetratelabs/wazero/internal/wasm"
"github.com/tetratelabs/wazero/internal/wasmruntime"
)
type (
// moduleEngine implements wasm.ModuleEngine.
moduleEngine struct {
// opaquePtr equals &opaque[0].
opaquePtr *byte
parent *compiledModule
module *wasm.ModuleInstance
opaque moduleContextOpaque
localFunctionInstances []*functionInstance
importedFunctions []importedFunction
listeners []experimental.FunctionListener
}
functionInstance struct {
executable *byte
moduleContextOpaquePtr *byte
typeID wasm.FunctionTypeID
indexInModule wasm.Index
}
importedFunction struct {
me *moduleEngine
indexInModule wasm.Index
}
// moduleContextOpaque is the opaque byte slice of Module instance specific contents whose size
// is only Wasm-compile-time known, hence dynamic. Its contents are basically the pointers to the module instance,
// specific objects as well as functions. This is sometimes called "VMContext" in other Wasm runtimes.
//
// Internally, the buffer is structured as follows:
//
// type moduleContextOpaque struct {
// moduleInstance *wasm.ModuleInstance
// localMemoryBufferPtr *byte (optional)
// localMemoryLength uint64 (optional)
// importedMemoryInstance *wasm.MemoryInstance (optional)
// importedMemoryOwnerOpaqueCtx *byte (optional)
// importedFunctions [# of importedFunctions]functionInstance
// importedGlobals []ImportedGlobal (optional)
// localGlobals []Global (optional)
// typeIDsBegin &wasm.ModuleInstance.TypeIDs[0] (optional)
// tables []*wasm.TableInstance (optional)
// beforeListenerTrampolines1stElement **byte (optional)
// afterListenerTrampolines1stElement **byte (optional)
// dataInstances1stElement []wasm.DataInstance (optional)
// elementInstances1stElement []wasm.ElementInstance (optional)
// }
//
// type ImportedGlobal struct {
// *Global
// _ uint64 // padding
// }
//
// type Global struct {
// Val, ValHi uint64
// }
//
// See wazevoapi.NewModuleContextOffsetData for the details of the offsets.
//
// Note that for host modules, the structure is entirely different. See buildHostModuleOpaque.
moduleContextOpaque []byte
)
func newAlignedOpaque(size int) moduleContextOpaque {
// Check if the size is a multiple of 16.
if size%16 != 0 {
panic("size must be a multiple of 16")
}
buf := make([]byte, size+16)
// Align the buffer to 16 bytes.
rem := uintptr(unsafe.Pointer(&buf[0])) % 16
buf = buf[16-rem:]
return buf
}
func putLocalMemory(opaque []byte, offset wazevoapi.Offset, mem *wasm.MemoryInstance) {
s := uint64(len(mem.Buffer))
var b uint64
if len(mem.Buffer) > 0 {
b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0])))
}
binary.LittleEndian.PutUint64(opaque[offset:], b)
binary.LittleEndian.PutUint64(opaque[offset+8:], s)
}
func (m *moduleEngine) setupOpaque() {
inst := m.module
offsets := &m.parent.offsets
opaque := m.opaque
binary.LittleEndian.PutUint64(opaque[offsets.ModuleInstanceOffset:],
uint64(uintptr(unsafe.Pointer(m.module))),
)
if lm := offsets.LocalMemoryBegin; lm >= 0 {
putLocalMemory(opaque, lm, inst.MemoryInstance)
}
// Note: imported memory is resolved in ResolveImportedFunction.
// Note: imported functions are resolved in ResolveImportedFunction.
if globalOffset := offsets.GlobalsBegin; globalOffset >= 0 {
for i, g := range inst.Globals {
if i < int(inst.Source.ImportGlobalCount) {
importedME := g.Me.(*moduleEngine)
offset := importedME.parent.offsets.GlobalInstanceOffset(g.Index)
importedMEOpaque := importedME.opaque
binary.LittleEndian.PutUint64(opaque[globalOffset:],
uint64(uintptr(unsafe.Pointer(&importedMEOpaque[offset]))))
} else {
binary.LittleEndian.PutUint64(opaque[globalOffset:], g.Val)
binary.LittleEndian.PutUint64(opaque[globalOffset+8:], g.ValHi)
}
globalOffset += 16
}
}
if tableOffset := offsets.TablesBegin; tableOffset >= 0 {
// First we write the first element's address of typeIDs.
if len(inst.TypeIDs) > 0 {
binary.LittleEndian.PutUint64(opaque[offsets.TypeIDs1stElement:], uint64(uintptr(unsafe.Pointer(&inst.TypeIDs[0]))))
}
// Then we write the table addresses.
for _, table := range inst.Tables {
binary.LittleEndian.PutUint64(opaque[tableOffset:], uint64(uintptr(unsafe.Pointer(table))))
tableOffset += 8
}
}
if beforeListenerOffset := offsets.BeforeListenerTrampolines1stElement; beforeListenerOffset >= 0 {
binary.LittleEndian.PutUint64(opaque[beforeListenerOffset:], uint64(uintptr(unsafe.Pointer(&m.parent.listenerBeforeTrampolines[0]))))
}
if afterListenerOffset := offsets.AfterListenerTrampolines1stElement; afterListenerOffset >= 0 {
binary.LittleEndian.PutUint64(opaque[afterListenerOffset:], uint64(uintptr(unsafe.Pointer(&m.parent.listenerAfterTrampolines[0]))))
}
if len(inst.DataInstances) > 0 {
binary.LittleEndian.PutUint64(opaque[offsets.DataInstances1stElement:], uint64(uintptr(unsafe.Pointer(&inst.DataInstances[0]))))
}
if len(inst.ElementInstances) > 0 {
binary.LittleEndian.PutUint64(opaque[offsets.ElementInstances1stElement:], uint64(uintptr(unsafe.Pointer(&inst.ElementInstances[0]))))
}
}
// NewFunction implements wasm.ModuleEngine.
func (m *moduleEngine) NewFunction(index wasm.Index) api.Function {
if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable {
panic("When PrintMachineCodeHexPerFunctionDisassemblable enabled, functions must not be called")
}
localIndex := index
if importedFnCount := m.module.Source.ImportFunctionCount; index < importedFnCount {
imported := &m.importedFunctions[index]
return imported.me.NewFunction(imported.indexInModule)
} else {
localIndex -= importedFnCount
}
src := m.module.Source
typIndex := src.FunctionSection[localIndex]
typ := src.TypeSection[typIndex]
sizeOfParamResultSlice := typ.ResultNumInUint64
if ps := typ.ParamNumInUint64; ps > sizeOfParamResultSlice {
sizeOfParamResultSlice = ps
}
p := m.parent
offset := p.functionOffsets[localIndex]
ce := &callEngine{
indexInModule: index,
executable: &p.executable[offset],
parent: m,
preambleExecutable: &m.parent.entryPreambles[typIndex][0],
sizeOfParamResultSlice: sizeOfParamResultSlice,
requiredParams: typ.ParamNumInUint64,
numberOfResults: typ.ResultNumInUint64,
}
ce.execCtx.memoryGrowTrampolineAddress = &m.parent.sharedFunctions.memoryGrowExecutable[0]
ce.execCtx.stackGrowCallTrampolineAddress = &m.parent.sharedFunctions.stackGrowExecutable[0]
ce.execCtx.checkModuleExitCodeTrampolineAddress = &m.parent.sharedFunctions.checkModuleExitCode[0]
ce.execCtx.tableGrowTrampolineAddress = &m.parent.sharedFunctions.tableGrowExecutable[0]
ce.execCtx.refFuncTrampolineAddress = &m.parent.sharedFunctions.refFuncExecutable[0]
ce.execCtx.memoryWait32TrampolineAddress = &m.parent.sharedFunctions.memoryWait32Executable[0]
ce.execCtx.memoryWait64TrampolineAddress = &m.parent.sharedFunctions.memoryWait64Executable[0]
ce.execCtx.memoryNotifyTrampolineAddress = &m.parent.sharedFunctions.memoryNotifyExecutable[0]
ce.execCtx.memmoveAddress = memmovPtr
ce.init()
return ce
}
// GetGlobalValue implements the same method as documented on wasm.ModuleEngine.
func (m *moduleEngine) GetGlobalValue(i wasm.Index) (lo, hi uint64) {
offset := m.parent.offsets.GlobalInstanceOffset(i)
buf := m.opaque[offset:]
if i < m.module.Source.ImportGlobalCount {
panic("GetGlobalValue should not be called for imported globals")
}
return binary.LittleEndian.Uint64(buf), binary.LittleEndian.Uint64(buf[8:])
}
// SetGlobalValue implements the same method as documented on wasm.ModuleEngine.
func (m *moduleEngine) SetGlobalValue(i wasm.Index, lo, hi uint64) {
offset := m.parent.offsets.GlobalInstanceOffset(i)
buf := m.opaque[offset:]
if i < m.module.Source.ImportGlobalCount {
panic("GetGlobalValue should not be called for imported globals")
}
binary.LittleEndian.PutUint64(buf, lo)
binary.LittleEndian.PutUint64(buf[8:], hi)
}
// OwnsGlobals implements the same method as documented on wasm.ModuleEngine.
func (m *moduleEngine) OwnsGlobals() bool { return true }
// ResolveImportedFunction implements wasm.ModuleEngine.
func (m *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) {
executableOffset, moduleCtxOffset, typeIDOffset := m.parent.offsets.ImportedFunctionOffset(index)
importedME := importedModuleEngine.(*moduleEngine)
if int(indexInImportedModule) >= len(importedME.importedFunctions) {
indexInImportedModule -= wasm.Index(len(importedME.importedFunctions))
} else {
imported := &importedME.importedFunctions[indexInImportedModule]
m.ResolveImportedFunction(index, imported.indexInModule, imported.me)
return // Recursively resolve the imported function.
}
offset := importedME.parent.functionOffsets[indexInImportedModule]
typeID := getTypeIDOf(indexInImportedModule, importedME.module)
executable := &importedME.parent.executable[offset]
// Write functionInstance.
binary.LittleEndian.PutUint64(m.opaque[executableOffset:], uint64(uintptr(unsafe.Pointer(executable))))
binary.LittleEndian.PutUint64(m.opaque[moduleCtxOffset:], uint64(uintptr(unsafe.Pointer(importedME.opaquePtr))))
binary.LittleEndian.PutUint64(m.opaque[typeIDOffset:], uint64(typeID))
// Write importedFunction so that it can be used by NewFunction.
m.importedFunctions[index] = importedFunction{me: importedME, indexInModule: indexInImportedModule}
}
func getTypeIDOf(funcIndex wasm.Index, m *wasm.ModuleInstance) wasm.FunctionTypeID {
source := m.Source
var typeIndex wasm.Index
if funcIndex >= source.ImportFunctionCount {
funcIndex -= source.ImportFunctionCount
typeIndex = source.FunctionSection[funcIndex]
} else {
var cnt wasm.Index
for i := range source.ImportSection {
if source.ImportSection[i].Type == wasm.ExternTypeFunc {
if cnt == funcIndex {
typeIndex = source.ImportSection[i].DescFunc
break
}
cnt++
}
}
}
return m.TypeIDs[typeIndex]
}
// ResolveImportedMemory implements wasm.ModuleEngine.
func (m *moduleEngine) ResolveImportedMemory(importedModuleEngine wasm.ModuleEngine) {
importedME := importedModuleEngine.(*moduleEngine)
inst := importedME.module
var memInstPtr uint64
var memOwnerOpaquePtr uint64
if offs := importedME.parent.offsets; offs.ImportedMemoryBegin >= 0 {
offset := offs.ImportedMemoryBegin
memInstPtr = binary.LittleEndian.Uint64(importedME.opaque[offset:])
memOwnerOpaquePtr = binary.LittleEndian.Uint64(importedME.opaque[offset+8:])
} else {
memInstPtr = uint64(uintptr(unsafe.Pointer(inst.MemoryInstance)))
memOwnerOpaquePtr = uint64(uintptr(unsafe.Pointer(importedME.opaquePtr)))
}
offset := m.parent.offsets.ImportedMemoryBegin
binary.LittleEndian.PutUint64(m.opaque[offset:], memInstPtr)
binary.LittleEndian.PutUint64(m.opaque[offset+8:], memOwnerOpaquePtr)
}
// DoneInstantiation implements wasm.ModuleEngine.
func (m *moduleEngine) DoneInstantiation() {
if !m.module.Source.IsHostModule {
m.setupOpaque()
}
}
// FunctionInstanceReference implements wasm.ModuleEngine.
func (m *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference {
if funcIndex < m.module.Source.ImportFunctionCount {
begin, _, _ := m.parent.offsets.ImportedFunctionOffset(funcIndex)
return uintptr(unsafe.Pointer(&m.opaque[begin]))
}
localIndex := funcIndex - m.module.Source.ImportFunctionCount
p := m.parent
executable := &p.executable[p.functionOffsets[localIndex]]
typeID := m.module.TypeIDs[m.module.Source.FunctionSection[localIndex]]
lf := &functionInstance{
executable: executable,
moduleContextOpaquePtr: m.opaquePtr,
typeID: typeID,
indexInModule: funcIndex,
}
m.localFunctionInstances = append(m.localFunctionInstances, lf)
return uintptr(unsafe.Pointer(lf))
}
// LookupFunction implements wasm.ModuleEngine.
func (m *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (*wasm.ModuleInstance, wasm.Index) {
if tableOffset >= uint32(len(t.References)) || t.Type != wasm.RefTypeFuncref {
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
}
rawPtr := t.References[tableOffset]
if rawPtr == 0 {
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
}
tf := wazevoapi.PtrFromUintptr[functionInstance](rawPtr)
if tf.typeID != typeId {
panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
}
return moduleInstanceFromOpaquePtr(tf.moduleContextOpaquePtr), tf.indexInModule
}
func moduleInstanceFromOpaquePtr(ptr *byte) *wasm.ModuleInstance {
return *(**wasm.ModuleInstance)(unsafe.Pointer(ptr))
}

View File

@ -0,0 +1,11 @@
//go:build !tinygo
package wazevo
import "reflect"
// setSliceLimits sets both Cap and Len for the given reflected slice.
func setSliceLimits(s *reflect.SliceHeader, l, c uintptr) {
s.Len = int(l)
s.Cap = int(c)
}

View File

@ -0,0 +1,11 @@
//go:build tinygo
package wazevo
import "reflect"
// setSliceLimits sets both Cap and Len for the given reflected slice.
func setSliceLimits(s *reflect.SliceHeader, l, c uintptr) {
s.Len = l
s.Cap = c
}

View File

@ -0,0 +1,407 @@
package ssa
import (
"fmt"
"strconv"
"strings"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
// BasicBlock represents the Basic Block of an SSA function.
// Each BasicBlock always ends with branching instructions (e.g. Branch, Return, etc.),
// and at most two branches are allowed. If there's two branches, these two are placed together at the end of the block.
// In other words, there's no branching instruction in the middle of the block.
//
// Note: we use the "block argument" variant of SSA, instead of PHI functions. See the package level doc comments.
//
// Note: we use "parameter/param" as a placeholder which represents a variant of PHI, and "argument/arg" as an actual
// Value passed to that "parameter/param".
type BasicBlock interface {
// ID returns the unique ID of this block.
ID() BasicBlockID
// Name returns the unique string ID of this block. e.g. blk0, blk1, ...
Name() string
// AddParam adds the parameter to the block whose type specified by `t`.
AddParam(b Builder, t Type) Value
// Params returns the number of parameters to this block.
Params() int
// Param returns (Variable, Value) which corresponds to the i-th parameter of this block.
// The returned Value is the definition of the param in this block.
Param(i int) Value
// InsertInstruction inserts an instruction that implements Value into the tail of this block.
InsertInstruction(raw *Instruction)
// Root returns the root instruction of this block.
Root() *Instruction
// Tail returns the tail instruction of this block.
Tail() *Instruction
// EntryBlock returns true if this block represents the function entry.
EntryBlock() bool
// ReturnBlock returns ture if this block represents the function return.
ReturnBlock() bool
// FormatHeader returns the debug string of this block, not including instruction.
FormatHeader(b Builder) string
// Valid is true if this block is still valid even after optimizations.
Valid() bool
// Sealed is true if this block has been sealed.
Sealed() bool
// BeginPredIterator returns the first predecessor of this block.
BeginPredIterator() BasicBlock
// NextPredIterator returns the next predecessor of this block.
NextPredIterator() BasicBlock
// Preds returns the number of predecessors of this block.
Preds() int
// Pred returns the i-th predecessor of this block.
Pred(i int) BasicBlock
// Succs returns the number of successors of this block.
Succs() int
// Succ returns the i-th successor of this block.
Succ(i int) BasicBlock
// LoopHeader returns true if this block is a loop header.
LoopHeader() bool
// LoopNestingForestChildren returns the children of this block in the loop nesting forest.
LoopNestingForestChildren() []BasicBlock
}
type (
// basicBlock is a basic block in a SSA-transformed function.
basicBlock struct {
id BasicBlockID
rootInstr, currentInstr *Instruction
params []blockParam
predIter int
preds []basicBlockPredecessorInfo
success []*basicBlock
// singlePred is the alias to preds[0] for fast lookup, and only set after Seal is called.
singlePred *basicBlock
// lastDefinitions maps Variable to its last definition in this block.
lastDefinitions map[Variable]Value
// unknownsValues are used in builder.findValue. The usage is well-described in the paper.
unknownValues []unknownValue
// invalid is true if this block is made invalid during optimizations.
invalid bool
// sealed is true if this is sealed (all the predecessors are known).
sealed bool
// loopHeader is true if this block is a loop header:
//
// > A loop header (sometimes called the entry point of the loop) is a dominator that is the target
// > of a loop-forming back edge. The loop header dominates all blocks in the loop body.
// > A block may be a loop header for more than one loop. A loop may have multiple entry points,
// > in which case it has no "loop header".
//
// See https://en.wikipedia.org/wiki/Control-flow_graph for more details.
//
// This is modified during the subPassLoopDetection pass.
loopHeader bool
// loopNestingForestChildren holds the children of this block in the loop nesting forest.
// Non-empty if and only if this block is a loop header (i.e. loopHeader=true)
loopNestingForestChildren []BasicBlock
// reversePostOrder is used to sort all the blocks in the function in reverse post order.
// This is used in builder.LayoutBlocks.
reversePostOrder int
// child and sibling are the ones in the dominator tree.
child, sibling *basicBlock
}
// BasicBlockID is the unique ID of a basicBlock.
BasicBlockID uint32
// blockParam implements Value and represents a parameter to a basicBlock.
blockParam struct {
// value is the Value that corresponds to the parameter in this block,
// and can be considered as an output of PHI instruction in traditional SSA.
value Value
// typ is the type of the parameter.
typ Type
}
unknownValue struct {
// variable is the variable that this unknownValue represents.
variable Variable
// value is the value that this unknownValue represents.
value Value
}
)
const basicBlockIDReturnBlock = 0xffffffff
// Name implements BasicBlock.Name.
func (bb *basicBlock) Name() string {
if bb.id == basicBlockIDReturnBlock {
return "blk_ret"
} else {
return fmt.Sprintf("blk%d", bb.id)
}
}
// String implements fmt.Stringer for debugging.
func (bid BasicBlockID) String() string {
if bid == basicBlockIDReturnBlock {
return "blk_ret"
} else {
return fmt.Sprintf("blk%d", bid)
}
}
// ID implements BasicBlock.ID.
func (bb *basicBlock) ID() BasicBlockID {
return bb.id
}
// basicBlockPredecessorInfo is the information of a predecessor of a basicBlock.
// predecessor is determined by a pair of block and the branch instruction used to jump to the successor.
type basicBlockPredecessorInfo struct {
blk *basicBlock
branch *Instruction
}
// EntryBlock implements BasicBlock.EntryBlock.
func (bb *basicBlock) EntryBlock() bool {
return bb.id == 0
}
// ReturnBlock implements BasicBlock.ReturnBlock.
func (bb *basicBlock) ReturnBlock() bool {
return bb.id == basicBlockIDReturnBlock
}
// AddParam implements BasicBlock.AddParam.
func (bb *basicBlock) AddParam(b Builder, typ Type) Value {
paramValue := b.allocateValue(typ)
bb.params = append(bb.params, blockParam{typ: typ, value: paramValue})
return paramValue
}
// addParamOn adds a parameter to this block whose value is already allocated.
func (bb *basicBlock) addParamOn(typ Type, value Value) {
bb.params = append(bb.params, blockParam{typ: typ, value: value})
}
// Params implements BasicBlock.Params.
func (bb *basicBlock) Params() int {
return len(bb.params)
}
// Param implements BasicBlock.Param.
func (bb *basicBlock) Param(i int) Value {
p := &bb.params[i]
return p.value
}
// Valid implements BasicBlock.Valid.
func (bb *basicBlock) Valid() bool {
return !bb.invalid
}
// Sealed implements BasicBlock.Sealed.
func (bb *basicBlock) Sealed() bool {
return bb.sealed
}
// InsertInstruction implements BasicBlock.InsertInstruction.
func (bb *basicBlock) InsertInstruction(next *Instruction) {
current := bb.currentInstr
if current != nil {
current.next = next
next.prev = current
} else {
bb.rootInstr = next
}
bb.currentInstr = next
switch next.opcode {
case OpcodeJump, OpcodeBrz, OpcodeBrnz:
target := next.blk.(*basicBlock)
target.addPred(bb, next)
case OpcodeBrTable:
for _, _target := range next.targets {
target := _target.(*basicBlock)
target.addPred(bb, next)
}
}
}
// NumPreds implements BasicBlock.NumPreds.
func (bb *basicBlock) NumPreds() int {
return len(bb.preds)
}
// BeginPredIterator implements BasicBlock.BeginPredIterator.
func (bb *basicBlock) BeginPredIterator() BasicBlock {
bb.predIter = 0
return bb.NextPredIterator()
}
// NextPredIterator implements BasicBlock.NextPredIterator.
func (bb *basicBlock) NextPredIterator() BasicBlock {
if bb.predIter >= len(bb.preds) {
return nil
}
pred := bb.preds[bb.predIter].blk
bb.predIter++
return pred
}
// Preds implements BasicBlock.Preds.
func (bb *basicBlock) Preds() int {
return len(bb.preds)
}
// Pred implements BasicBlock.Pred.
func (bb *basicBlock) Pred(i int) BasicBlock {
return bb.preds[i].blk
}
// Succs implements BasicBlock.Succs.
func (bb *basicBlock) Succs() int {
return len(bb.success)
}
// Succ implements BasicBlock.Succ.
func (bb *basicBlock) Succ(i int) BasicBlock {
return bb.success[i]
}
// Root implements BasicBlock.Root.
func (bb *basicBlock) Root() *Instruction {
return bb.rootInstr
}
// Tail implements BasicBlock.Tail.
func (bb *basicBlock) Tail() *Instruction {
return bb.currentInstr
}
// reset resets the basicBlock to its initial state so that it can be reused for another function.
func resetBasicBlock(bb *basicBlock) {
bb.params = bb.params[:0]
bb.rootInstr, bb.currentInstr = nil, nil
bb.preds = bb.preds[:0]
bb.success = bb.success[:0]
bb.invalid, bb.sealed = false, false
bb.singlePred = nil
bb.unknownValues = bb.unknownValues[:0]
bb.lastDefinitions = wazevoapi.ResetMap(bb.lastDefinitions)
bb.reversePostOrder = -1
bb.loopNestingForestChildren = bb.loopNestingForestChildren[:0]
bb.loopHeader = false
bb.sibling = nil
bb.child = nil
}
// addPred adds a predecessor to this block specified by the branch instruction.
func (bb *basicBlock) addPred(blk BasicBlock, branch *Instruction) {
if bb.sealed {
panic("BUG: trying to add predecessor to a sealed block: " + bb.Name())
}
pred := blk.(*basicBlock)
for i := range bb.preds {
existingPred := &bb.preds[i]
if existingPred.blk == pred && existingPred.branch != branch {
// If the target is already added, then this must come from the same BrTable,
// otherwise such redundant branch should be eliminated by the frontend. (which should be simpler).
panic(fmt.Sprintf("BUG: redundant non BrTable jumps in %s whose targes are the same", bb.Name()))
}
}
bb.preds = append(bb.preds, basicBlockPredecessorInfo{
blk: pred,
branch: branch,
})
pred.success = append(pred.success, bb)
}
// FormatHeader implements BasicBlock.FormatHeader.
func (bb *basicBlock) FormatHeader(b Builder) string {
ps := make([]string, len(bb.params))
for i, p := range bb.params {
ps[i] = p.value.formatWithType(b)
}
if len(bb.preds) > 0 {
preds := make([]string, 0, len(bb.preds))
for _, pred := range bb.preds {
if pred.blk.invalid {
continue
}
preds = append(preds, fmt.Sprintf("blk%d", pred.blk.id))
}
return fmt.Sprintf("blk%d: (%s) <-- (%s)",
bb.id, strings.Join(ps, ","), strings.Join(preds, ","))
} else {
return fmt.Sprintf("blk%d: (%s)", bb.id, strings.Join(ps, ", "))
}
}
// validates validates the basicBlock for debugging purpose.
func (bb *basicBlock) validate(b *builder) {
if bb.invalid {
panic("BUG: trying to validate an invalid block: " + bb.Name())
}
if len(bb.preds) > 0 {
for _, pred := range bb.preds {
if pred.branch.opcode != OpcodeBrTable {
if target := pred.branch.blk; target != bb {
panic(fmt.Sprintf("BUG: '%s' is not branch to %s, but to %s",
pred.branch.Format(b), bb.Name(), target.Name()))
}
}
var exp int
if bb.ReturnBlock() {
exp = len(b.currentSignature.Results)
} else {
exp = len(bb.params)
}
if len(pred.branch.vs.View()) != exp {
panic(fmt.Sprintf(
"BUG: len(argument at %s) != len(params at %s): %d != %d: %s",
pred.blk.Name(), bb.Name(),
len(pred.branch.vs.View()), len(bb.params), pred.branch.Format(b),
))
}
}
}
}
// String implements fmt.Stringer for debugging purpose only.
func (bb *basicBlock) String() string {
return strconv.Itoa(int(bb.id))
}
// LoopNestingForestChildren implements BasicBlock.LoopNestingForestChildren.
func (bb *basicBlock) LoopNestingForestChildren() []BasicBlock {
return bb.loopNestingForestChildren
}
// LoopHeader implements BasicBlock.LoopHeader.
func (bb *basicBlock) LoopHeader() bool {
return bb.loopHeader
}

View File

@ -0,0 +1,34 @@
//go:build go1.21
package ssa
import (
"slices"
)
func sortBlocks(blocks []*basicBlock) {
slices.SortFunc(blocks, func(i, j *basicBlock) int {
jIsReturn := j.ReturnBlock()
iIsReturn := i.ReturnBlock()
if iIsReturn && jIsReturn {
return 0
}
if jIsReturn {
return 1
}
if iIsReturn {
return -1
}
iRoot, jRoot := i.rootInstr, j.rootInstr
if iRoot == nil && jRoot == nil { // For testing.
return 0
}
if jRoot == nil {
return 1
}
if iRoot == nil {
return -1
}
return i.rootInstr.id - j.rootInstr.id
})
}

View File

@ -0,0 +1,24 @@
//go:build !go1.21
// TODO: delete after the floor Go version is 1.21
package ssa
import "sort"
func sortBlocks(blocks []*basicBlock) {
sort.SliceStable(blocks, func(i, j int) bool {
iBlk, jBlk := blocks[i], blocks[j]
if jBlk.ReturnBlock() {
return true
}
if iBlk.ReturnBlock() {
return false
}
iRoot, jRoot := iBlk.rootInstr, jBlk.rootInstr
if iRoot == nil || jRoot == nil { // For testing.
return true
}
return iBlk.rootInstr.id < jBlk.rootInstr.id
})
}

View File

@ -0,0 +1,731 @@
package ssa
import (
"fmt"
"sort"
"strings"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
// Builder is used to builds SSA consisting of Basic Blocks per function.
type Builder interface {
// Init must be called to reuse this builder for the next function.
Init(typ *Signature)
// Signature returns the Signature of the currently-compiled function.
Signature() *Signature
// BlockIDMax returns the maximum value of BasicBlocksID existing in the currently-compiled function.
BlockIDMax() BasicBlockID
// AllocateBasicBlock creates a basic block in SSA function.
AllocateBasicBlock() BasicBlock
// CurrentBlock returns the currently handled BasicBlock which is set by the latest call to SetCurrentBlock.
CurrentBlock() BasicBlock
// EntryBlock returns the entry BasicBlock of the currently-compiled function.
EntryBlock() BasicBlock
// SetCurrentBlock sets the instruction insertion target to the BasicBlock `b`.
SetCurrentBlock(b BasicBlock)
// DeclareVariable declares a Variable of the given Type.
DeclareVariable(Type) Variable
// DefineVariable defines a variable in the `block` with value.
// The defining instruction will be inserted into the `block`.
DefineVariable(variable Variable, value Value, block BasicBlock)
// DefineVariableInCurrentBB is the same as DefineVariable except the definition is
// inserted into the current BasicBlock. Alias to DefineVariable(x, y, CurrentBlock()).
DefineVariableInCurrentBB(variable Variable, value Value)
// AllocateInstruction returns a new Instruction.
AllocateInstruction() *Instruction
// InsertInstruction executes BasicBlock.InsertInstruction for the currently handled basic block.
InsertInstruction(raw *Instruction)
// allocateValue allocates an unused Value.
allocateValue(typ Type) Value
// MustFindValue searches the latest definition of the given Variable and returns the result.
MustFindValue(variable Variable) Value
// MustFindValueInBlk is the same as MustFindValue except it searches the latest definition from the given BasicBlock.
MustFindValueInBlk(variable Variable, blk BasicBlock) Value
// FindValueInLinearPath tries to find the latest definition of the given Variable in the linear path to the current BasicBlock.
// If it cannot find the definition, or it's not sealed yet, it returns ValueInvalid.
FindValueInLinearPath(variable Variable) Value
// Seal declares that we've known all the predecessors to this block and were added via AddPred.
// After calling this, AddPred will be forbidden.
Seal(blk BasicBlock)
// AnnotateValue is for debugging purpose.
AnnotateValue(value Value, annotation string)
// DeclareSignature appends the *Signature to be referenced by various instructions (e.g. OpcodeCall).
DeclareSignature(signature *Signature)
// Signatures returns the slice of declared Signatures.
Signatures() []*Signature
// ResolveSignature returns the Signature which corresponds to SignatureID.
ResolveSignature(id SignatureID) *Signature
// RunPasses runs various passes on the constructed SSA function.
RunPasses()
// Format returns the debugging string of the SSA function.
Format() string
// BlockIteratorBegin initializes the state to iterate over all the valid BasicBlock(s) compiled.
// Combined with BlockIteratorNext, we can use this like:
//
// for blk := builder.BlockIteratorBegin(); blk != nil; blk = builder.BlockIteratorNext() {
// // ...
// }
//
// The returned blocks are ordered in the order of AllocateBasicBlock being called.
BlockIteratorBegin() BasicBlock
// BlockIteratorNext advances the state for iteration initialized by BlockIteratorBegin.
// Returns nil if there's no unseen BasicBlock.
BlockIteratorNext() BasicBlock
// ValueRefCounts returns the map of ValueID to its reference count.
// The returned slice must not be modified.
ValueRefCounts() []int
// BlockIteratorReversePostOrderBegin is almost the same as BlockIteratorBegin except it returns the BasicBlock in the reverse post-order.
// This is available after RunPasses is run.
BlockIteratorReversePostOrderBegin() BasicBlock
// BlockIteratorReversePostOrderNext is almost the same as BlockIteratorPostOrderNext except it returns the BasicBlock in the reverse post-order.
// This is available after RunPasses is run.
BlockIteratorReversePostOrderNext() BasicBlock
// ReturnBlock returns the BasicBlock which is used to return from the function.
ReturnBlock() BasicBlock
// InsertUndefined inserts an undefined instruction at the current position.
InsertUndefined()
// SetCurrentSourceOffset sets the current source offset. The incoming instruction will be annotated with this offset.
SetCurrentSourceOffset(line SourceOffset)
// LoopNestingForestRoots returns the roots of the loop nesting forest.
LoopNestingForestRoots() []BasicBlock
// LowestCommonAncestor returns the lowest common ancestor in the dominator tree of the given BasicBlock(s).
LowestCommonAncestor(blk1, blk2 BasicBlock) BasicBlock
// Idom returns the immediate dominator of the given BasicBlock.
Idom(blk BasicBlock) BasicBlock
VarLengthPool() *wazevoapi.VarLengthPool[Value]
}
// NewBuilder returns a new Builder implementation.
func NewBuilder() Builder {
return &builder{
instructionsPool: wazevoapi.NewPool[Instruction](resetInstruction),
basicBlocksPool: wazevoapi.NewPool[basicBlock](resetBasicBlock),
varLengthPool: wazevoapi.NewVarLengthPool[Value](),
valueAnnotations: make(map[ValueID]string),
signatures: make(map[SignatureID]*Signature),
blkVisited: make(map[*basicBlock]int),
valueIDAliases: make(map[ValueID]Value),
redundantParameterIndexToValue: make(map[int]Value),
returnBlk: &basicBlock{id: basicBlockIDReturnBlock},
}
}
// builder implements Builder interface.
type builder struct {
basicBlocksPool wazevoapi.Pool[basicBlock]
instructionsPool wazevoapi.Pool[Instruction]
varLengthPool wazevoapi.VarLengthPool[Value]
signatures map[SignatureID]*Signature
currentSignature *Signature
// reversePostOrderedBasicBlocks are the BasicBlock(s) ordered in the reverse post-order after passCalculateImmediateDominators.
reversePostOrderedBasicBlocks []*basicBlock
currentBB *basicBlock
returnBlk *basicBlock
// variables track the types for Variable with the index regarded Variable.
variables []Type
// nextValueID is used by builder.AllocateValue.
nextValueID ValueID
// nextVariable is used by builder.AllocateVariable.
nextVariable Variable
valueIDAliases map[ValueID]Value
valueAnnotations map[ValueID]string
// valueRefCounts is used to lower the SSA in backend, and will be calculated
// by the last SSA-level optimization pass.
valueRefCounts []int
// dominators stores the immediate dominator of each BasicBlock.
// The index is blockID of the BasicBlock.
dominators []*basicBlock
sparseTree dominatorSparseTree
// loopNestingForestRoots are the roots of the loop nesting forest.
loopNestingForestRoots []BasicBlock
// The followings are used for optimization passes/deterministic compilation.
instStack []*Instruction
blkVisited map[*basicBlock]int
valueIDToInstruction []*Instruction
blkStack []*basicBlock
blkStack2 []*basicBlock
ints []int
redundantParameterIndexToValue map[int]Value
// blockIterCur is used to implement blockIteratorBegin and blockIteratorNext.
blockIterCur int
// donePreBlockLayoutPasses is true if all the passes before LayoutBlocks are called.
donePreBlockLayoutPasses bool
// doneBlockLayout is true if LayoutBlocks is called.
doneBlockLayout bool
// donePostBlockLayoutPasses is true if all the passes after LayoutBlocks are called.
donePostBlockLayoutPasses bool
currentSourceOffset SourceOffset
}
func (b *builder) VarLengthPool() *wazevoapi.VarLengthPool[Value] {
return &b.varLengthPool
}
// ReturnBlock implements Builder.ReturnBlock.
func (b *builder) ReturnBlock() BasicBlock {
return b.returnBlk
}
// Init implements Builder.Reset.
func (b *builder) Init(s *Signature) {
b.nextVariable = 0
b.currentSignature = s
resetBasicBlock(b.returnBlk)
b.instructionsPool.Reset()
b.basicBlocksPool.Reset()
b.varLengthPool.Reset()
b.donePreBlockLayoutPasses = false
b.doneBlockLayout = false
b.donePostBlockLayoutPasses = false
for _, sig := range b.signatures {
sig.used = false
}
b.ints = b.ints[:0]
b.blkStack = b.blkStack[:0]
b.blkStack2 = b.blkStack2[:0]
b.dominators = b.dominators[:0]
b.loopNestingForestRoots = b.loopNestingForestRoots[:0]
for i := 0; i < b.basicBlocksPool.Allocated(); i++ {
blk := b.basicBlocksPool.View(i)
delete(b.blkVisited, blk)
}
b.basicBlocksPool.Reset()
for v := ValueID(0); v < b.nextValueID; v++ {
delete(b.valueAnnotations, v)
delete(b.valueIDAliases, v)
b.valueRefCounts[v] = 0
b.valueIDToInstruction[v] = nil
}
b.nextValueID = 0
b.reversePostOrderedBasicBlocks = b.reversePostOrderedBasicBlocks[:0]
b.doneBlockLayout = false
for i := range b.valueRefCounts {
b.valueRefCounts[i] = 0
}
b.currentSourceOffset = sourceOffsetUnknown
}
// Signature implements Builder.Signature.
func (b *builder) Signature() *Signature {
return b.currentSignature
}
// AnnotateValue implements Builder.AnnotateValue.
func (b *builder) AnnotateValue(value Value, a string) {
b.valueAnnotations[value.ID()] = a
}
// AllocateInstruction implements Builder.AllocateInstruction.
func (b *builder) AllocateInstruction() *Instruction {
instr := b.instructionsPool.Allocate()
instr.id = b.instructionsPool.Allocated()
return instr
}
// DeclareSignature implements Builder.AnnotateValue.
func (b *builder) DeclareSignature(s *Signature) {
b.signatures[s.ID] = s
s.used = false
}
// Signatures implements Builder.Signatures.
func (b *builder) Signatures() (ret []*Signature) {
for _, sig := range b.signatures {
ret = append(ret, sig)
}
sort.Slice(ret, func(i, j int) bool {
return ret[i].ID < ret[j].ID
})
return
}
// SetCurrentSourceOffset implements Builder.SetCurrentSourceOffset.
func (b *builder) SetCurrentSourceOffset(l SourceOffset) {
b.currentSourceOffset = l
}
func (b *builder) usedSignatures() (ret []*Signature) {
for _, sig := range b.signatures {
if sig.used {
ret = append(ret, sig)
}
}
sort.Slice(ret, func(i, j int) bool {
return ret[i].ID < ret[j].ID
})
return
}
// ResolveSignature implements Builder.ResolveSignature.
func (b *builder) ResolveSignature(id SignatureID) *Signature {
return b.signatures[id]
}
// AllocateBasicBlock implements Builder.AllocateBasicBlock.
func (b *builder) AllocateBasicBlock() BasicBlock {
return b.allocateBasicBlock()
}
// allocateBasicBlock allocates a new basicBlock.
func (b *builder) allocateBasicBlock() *basicBlock {
id := BasicBlockID(b.basicBlocksPool.Allocated())
blk := b.basicBlocksPool.Allocate()
blk.id = id
return blk
}
// Idom implements Builder.Idom.
func (b *builder) Idom(blk BasicBlock) BasicBlock {
return b.dominators[blk.ID()]
}
// InsertInstruction implements Builder.InsertInstruction.
func (b *builder) InsertInstruction(instr *Instruction) {
b.currentBB.InsertInstruction(instr)
if l := b.currentSourceOffset; l.Valid() {
// Emit the source offset info only when the instruction has side effect because
// these are the only instructions that are accessed by stack unwinding.
// This reduces the significant amount of the offset info in the binary.
if instr.sideEffect() != sideEffectNone {
instr.annotateSourceOffset(l)
}
}
resultTypesFn := instructionReturnTypes[instr.opcode]
if resultTypesFn == nil {
panic("TODO: " + instr.Format(b))
}
t1, ts := resultTypesFn(b, instr)
if t1.invalid() {
return
}
r1 := b.allocateValue(t1)
instr.rValue = r1
tsl := len(ts)
if tsl == 0 {
return
}
rValues := b.varLengthPool.Allocate(tsl)
for i := 0; i < tsl; i++ {
rValues = rValues.Append(&b.varLengthPool, b.allocateValue(ts[i]))
}
instr.rValues = rValues
}
// DefineVariable implements Builder.DefineVariable.
func (b *builder) DefineVariable(variable Variable, value Value, block BasicBlock) {
if b.variables[variable].invalid() {
panic("BUG: trying to define variable " + variable.String() + " but is not declared yet")
}
if b.variables[variable] != value.Type() {
panic(fmt.Sprintf("BUG: inconsistent type for variable %d: expected %s but got %s", variable, b.variables[variable], value.Type()))
}
bb := block.(*basicBlock)
bb.lastDefinitions[variable] = value
}
// DefineVariableInCurrentBB implements Builder.DefineVariableInCurrentBB.
func (b *builder) DefineVariableInCurrentBB(variable Variable, value Value) {
b.DefineVariable(variable, value, b.currentBB)
}
// SetCurrentBlock implements Builder.SetCurrentBlock.
func (b *builder) SetCurrentBlock(bb BasicBlock) {
b.currentBB = bb.(*basicBlock)
}
// CurrentBlock implements Builder.CurrentBlock.
func (b *builder) CurrentBlock() BasicBlock {
return b.currentBB
}
// EntryBlock implements Builder.EntryBlock.
func (b *builder) EntryBlock() BasicBlock {
return b.entryBlk()
}
// DeclareVariable implements Builder.DeclareVariable.
func (b *builder) DeclareVariable(typ Type) Variable {
v := b.allocateVariable()
iv := int(v)
if l := len(b.variables); l <= iv {
b.variables = append(b.variables, make([]Type, 2*(l+1))...)
}
b.variables[v] = typ
return v
}
// allocateVariable allocates a new variable.
func (b *builder) allocateVariable() (ret Variable) {
ret = b.nextVariable
b.nextVariable++
return
}
// allocateValue implements Builder.AllocateValue.
func (b *builder) allocateValue(typ Type) (v Value) {
v = Value(b.nextValueID)
v = v.setType(typ)
b.nextValueID++
return
}
// FindValueInLinearPath implements Builder.FindValueInLinearPath.
func (b *builder) FindValueInLinearPath(variable Variable) Value {
return b.findValueInLinearPath(variable, b.currentBB)
}
func (b *builder) findValueInLinearPath(variable Variable, blk *basicBlock) Value {
if val, ok := blk.lastDefinitions[variable]; ok {
return val
} else if !blk.sealed {
return ValueInvalid
}
if pred := blk.singlePred; pred != nil {
// If this block is sealed and have only one predecessor,
// we can use the value in that block without ambiguity on definition.
return b.findValueInLinearPath(variable, pred)
}
if len(blk.preds) == 1 {
panic("BUG")
}
return ValueInvalid
}
func (b *builder) MustFindValueInBlk(variable Variable, blk BasicBlock) Value {
typ := b.definedVariableType(variable)
return b.findValue(typ, variable, blk.(*basicBlock))
}
// MustFindValue implements Builder.MustFindValue.
func (b *builder) MustFindValue(variable Variable) Value {
typ := b.definedVariableType(variable)
return b.findValue(typ, variable, b.currentBB)
}
// findValue recursively tries to find the latest definition of a `variable`. The algorithm is described in
// the section 2 of the paper https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf.
//
// TODO: reimplement this in iterative, not recursive, to avoid stack overflow.
func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value {
if val, ok := blk.lastDefinitions[variable]; ok {
// The value is already defined in this block!
return val
} else if !blk.sealed { // Incomplete CFG as in the paper.
// If this is not sealed, that means it might have additional unknown predecessor later on.
// So we temporarily define the placeholder value here (not add as a parameter yet!),
// and record it as unknown.
// The unknown values are resolved when we call seal this block via BasicBlock.Seal().
value := b.allocateValue(typ)
if wazevoapi.SSALoggingEnabled {
fmt.Printf("adding unknown value placeholder for %s at %d\n", variable, blk.id)
}
blk.lastDefinitions[variable] = value
blk.unknownValues = append(blk.unknownValues, unknownValue{
variable: variable,
value: value,
})
return value
}
if pred := blk.singlePred; pred != nil {
// If this block is sealed and have only one predecessor,
// we can use the value in that block without ambiguity on definition.
return b.findValue(typ, variable, pred)
} else if len(blk.preds) == 0 {
panic("BUG: value is not defined for " + variable.String())
}
// If this block has multiple predecessors, we have to gather the definitions,
// and treat them as an argument to this block.
//
// The first thing is to define a new parameter to this block which may or may not be redundant, but
// later we eliminate trivial params in an optimization pass. This must be done before finding the
// definitions in the predecessors so that we can break the cycle.
paramValue := blk.AddParam(b, typ)
b.DefineVariable(variable, paramValue, blk)
// After the new param is added, we have to manipulate the original branching instructions
// in predecessors so that they would pass the definition of `variable` as the argument to
// the newly added PHI.
for i := range blk.preds {
pred := &blk.preds[i]
value := b.findValue(typ, variable, pred.blk)
pred.branch.addArgumentBranchInst(b, value)
}
return paramValue
}
// Seal implements Builder.Seal.
func (b *builder) Seal(raw BasicBlock) {
blk := raw.(*basicBlock)
if len(blk.preds) == 1 {
blk.singlePred = blk.preds[0].blk
}
blk.sealed = true
for _, v := range blk.unknownValues {
variable, phiValue := v.variable, v.value
typ := b.definedVariableType(variable)
blk.addParamOn(typ, phiValue)
for i := range blk.preds {
pred := &blk.preds[i]
predValue := b.findValue(typ, variable, pred.blk)
if !predValue.Valid() {
panic("BUG: value is not defined anywhere in the predecessors in the CFG")
}
pred.branch.addArgumentBranchInst(b, predValue)
}
}
}
// definedVariableType returns the type of the given variable. If the variable is not defined yet, it panics.
func (b *builder) definedVariableType(variable Variable) Type {
typ := b.variables[variable]
if typ.invalid() {
panic(fmt.Sprintf("%s is not defined yet", variable))
}
return typ
}
// Format implements Builder.Format.
func (b *builder) Format() string {
str := strings.Builder{}
usedSigs := b.usedSignatures()
if len(usedSigs) > 0 {
str.WriteByte('\n')
str.WriteString("signatures:\n")
for _, sig := range usedSigs {
str.WriteByte('\t')
str.WriteString(sig.String())
str.WriteByte('\n')
}
}
var iterBegin, iterNext func() *basicBlock
if b.doneBlockLayout {
iterBegin, iterNext = b.blockIteratorReversePostOrderBegin, b.blockIteratorReversePostOrderNext
} else {
iterBegin, iterNext = b.blockIteratorBegin, b.blockIteratorNext
}
for bb := iterBegin(); bb != nil; bb = iterNext() {
str.WriteByte('\n')
str.WriteString(bb.FormatHeader(b))
str.WriteByte('\n')
for cur := bb.Root(); cur != nil; cur = cur.Next() {
str.WriteByte('\t')
str.WriteString(cur.Format(b))
str.WriteByte('\n')
}
}
return str.String()
}
// BlockIteratorNext implements Builder.BlockIteratorNext.
func (b *builder) BlockIteratorNext() BasicBlock {
if blk := b.blockIteratorNext(); blk == nil {
return nil // BasicBlock((*basicBlock)(nil)) != BasicBlock(nil)
} else {
return blk
}
}
// BlockIteratorNext implements Builder.BlockIteratorNext.
func (b *builder) blockIteratorNext() *basicBlock {
index := b.blockIterCur
for {
if index == b.basicBlocksPool.Allocated() {
return nil
}
ret := b.basicBlocksPool.View(index)
index++
if !ret.invalid {
b.blockIterCur = index
return ret
}
}
}
// BlockIteratorBegin implements Builder.BlockIteratorBegin.
func (b *builder) BlockIteratorBegin() BasicBlock {
return b.blockIteratorBegin()
}
// BlockIteratorBegin implements Builder.BlockIteratorBegin.
func (b *builder) blockIteratorBegin() *basicBlock {
b.blockIterCur = 0
return b.blockIteratorNext()
}
// BlockIteratorReversePostOrderBegin implements Builder.BlockIteratorReversePostOrderBegin.
func (b *builder) BlockIteratorReversePostOrderBegin() BasicBlock {
return b.blockIteratorReversePostOrderBegin()
}
// BlockIteratorBegin implements Builder.BlockIteratorBegin.
func (b *builder) blockIteratorReversePostOrderBegin() *basicBlock {
b.blockIterCur = 0
return b.blockIteratorReversePostOrderNext()
}
// BlockIteratorReversePostOrderNext implements Builder.BlockIteratorReversePostOrderNext.
func (b *builder) BlockIteratorReversePostOrderNext() BasicBlock {
if blk := b.blockIteratorReversePostOrderNext(); blk == nil {
return nil // BasicBlock((*basicBlock)(nil)) != BasicBlock(nil)
} else {
return blk
}
}
// BlockIteratorNext implements Builder.BlockIteratorNext.
func (b *builder) blockIteratorReversePostOrderNext() *basicBlock {
if b.blockIterCur >= len(b.reversePostOrderedBasicBlocks) {
return nil
} else {
ret := b.reversePostOrderedBasicBlocks[b.blockIterCur]
b.blockIterCur++
return ret
}
}
// ValueRefCounts implements Builder.ValueRefCounts.
func (b *builder) ValueRefCounts() []int {
return b.valueRefCounts
}
// alias records the alias of the given values. The alias(es) will be
// eliminated in the optimization pass via resolveArgumentAlias.
func (b *builder) alias(dst, src Value) {
b.valueIDAliases[dst.ID()] = src
}
// resolveArgumentAlias resolves the alias of the arguments of the given instruction.
func (b *builder) resolveArgumentAlias(instr *Instruction) {
if instr.v.Valid() {
instr.v = b.resolveAlias(instr.v)
}
if instr.v2.Valid() {
instr.v2 = b.resolveAlias(instr.v2)
}
if instr.v3.Valid() {
instr.v3 = b.resolveAlias(instr.v3)
}
view := instr.vs.View()
for i, v := range view {
view[i] = b.resolveAlias(v)
}
}
// resolveAlias resolves the alias of the given value.
func (b *builder) resolveAlias(v Value) Value {
// Some aliases are chained, so we need to resolve them recursively.
for {
if src, ok := b.valueIDAliases[v.ID()]; ok {
v = src
} else {
break
}
}
return v
}
// entryBlk returns the entry block of the function.
func (b *builder) entryBlk() *basicBlock {
return b.basicBlocksPool.View(0)
}
// isDominatedBy returns true if the given block `n` is dominated by the given block `d`.
// Before calling this, the builder must pass by passCalculateImmediateDominators.
func (b *builder) isDominatedBy(n *basicBlock, d *basicBlock) bool {
if len(b.dominators) == 0 {
panic("BUG: passCalculateImmediateDominators must be called before calling isDominatedBy")
}
ent := b.entryBlk()
doms := b.dominators
for n != d && n != ent {
n = doms[n.id]
}
return n == d
}
// BlockIDMax implements Builder.BlockIDMax.
func (b *builder) BlockIDMax() BasicBlockID {
return BasicBlockID(b.basicBlocksPool.Allocated())
}
// InsertUndefined implements Builder.InsertUndefined.
func (b *builder) InsertUndefined() {
instr := b.AllocateInstruction()
instr.opcode = OpcodeUndefined
b.InsertInstruction(instr)
}
// LoopNestingForestRoots implements Builder.LoopNestingForestRoots.
func (b *builder) LoopNestingForestRoots() []BasicBlock {
return b.loopNestingForestRoots
}
// LowestCommonAncestor implements Builder.LowestCommonAncestor.
func (b *builder) LowestCommonAncestor(blk1, blk2 BasicBlock) BasicBlock {
return b.sparseTree.findLCA(blk1.ID(), blk2.ID())
}

View File

@ -0,0 +1,107 @@
package ssa
// IntegerCmpCond represents a condition for integer comparison.
type IntegerCmpCond byte
const (
// IntegerCmpCondInvalid represents an invalid condition.
IntegerCmpCondInvalid IntegerCmpCond = iota
// IntegerCmpCondEqual represents "==".
IntegerCmpCondEqual
// IntegerCmpCondNotEqual represents "!=".
IntegerCmpCondNotEqual
// IntegerCmpCondSignedLessThan represents Signed "<".
IntegerCmpCondSignedLessThan
// IntegerCmpCondSignedGreaterThanOrEqual represents Signed ">=".
IntegerCmpCondSignedGreaterThanOrEqual
// IntegerCmpCondSignedGreaterThan represents Signed ">".
IntegerCmpCondSignedGreaterThan
// IntegerCmpCondSignedLessThanOrEqual represents Signed "<=".
IntegerCmpCondSignedLessThanOrEqual
// IntegerCmpCondUnsignedLessThan represents Unsigned "<".
IntegerCmpCondUnsignedLessThan
// IntegerCmpCondUnsignedGreaterThanOrEqual represents Unsigned ">=".
IntegerCmpCondUnsignedGreaterThanOrEqual
// IntegerCmpCondUnsignedGreaterThan represents Unsigned ">".
IntegerCmpCondUnsignedGreaterThan
// IntegerCmpCondUnsignedLessThanOrEqual represents Unsigned "<=".
IntegerCmpCondUnsignedLessThanOrEqual
)
// String implements fmt.Stringer.
func (i IntegerCmpCond) String() string {
switch i {
case IntegerCmpCondEqual:
return "eq"
case IntegerCmpCondNotEqual:
return "neq"
case IntegerCmpCondSignedLessThan:
return "lt_s"
case IntegerCmpCondSignedGreaterThanOrEqual:
return "ge_s"
case IntegerCmpCondSignedGreaterThan:
return "gt_s"
case IntegerCmpCondSignedLessThanOrEqual:
return "le_s"
case IntegerCmpCondUnsignedLessThan:
return "lt_u"
case IntegerCmpCondUnsignedGreaterThanOrEqual:
return "ge_u"
case IntegerCmpCondUnsignedGreaterThan:
return "gt_u"
case IntegerCmpCondUnsignedLessThanOrEqual:
return "le_u"
default:
panic("invalid integer comparison condition")
}
}
// Signed returns true if the condition is signed integer comparison.
func (i IntegerCmpCond) Signed() bool {
switch i {
case IntegerCmpCondSignedLessThan, IntegerCmpCondSignedGreaterThanOrEqual,
IntegerCmpCondSignedGreaterThan, IntegerCmpCondSignedLessThanOrEqual:
return true
default:
return false
}
}
type FloatCmpCond byte
const (
// FloatCmpCondInvalid represents an invalid condition.
FloatCmpCondInvalid FloatCmpCond = iota
// FloatCmpCondEqual represents "==".
FloatCmpCondEqual
// FloatCmpCondNotEqual represents "!=".
FloatCmpCondNotEqual
// FloatCmpCondLessThan represents "<".
FloatCmpCondLessThan
// FloatCmpCondLessThanOrEqual represents "<=".
FloatCmpCondLessThanOrEqual
// FloatCmpCondGreaterThan represents ">".
FloatCmpCondGreaterThan
// FloatCmpCondGreaterThanOrEqual represents ">=".
FloatCmpCondGreaterThanOrEqual
)
// String implements fmt.Stringer.
func (f FloatCmpCond) String() string {
switch f {
case FloatCmpCondEqual:
return "eq"
case FloatCmpCondNotEqual:
return "neq"
case FloatCmpCondLessThan:
return "lt"
case FloatCmpCondLessThanOrEqual:
return "le"
case FloatCmpCondGreaterThan:
return "gt"
case FloatCmpCondGreaterThanOrEqual:
return "ge"
default:
panic("invalid float comparison condition")
}
}

View File

@ -0,0 +1,12 @@
package ssa
import "fmt"
// FuncRef is a unique identifier for a function of the frontend,
// and is used to reference the function in function call.
type FuncRef uint32
// String implements fmt.Stringer.
func (r FuncRef) String() string {
return fmt.Sprintf("f%d", r)
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,417 @@
package ssa
import (
"fmt"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
// RunPasses implements Builder.RunPasses.
//
// The order here matters; some pass depends on the previous ones.
//
// Note that passes suffixed with "Opt" are the optimization passes, meaning that they edit the instructions and blocks
// while the other passes are not, like passEstimateBranchProbabilities does not edit them, but only calculates the additional information.
func (b *builder) RunPasses() {
b.runPreBlockLayoutPasses()
b.runBlockLayoutPass()
b.runPostBlockLayoutPasses()
b.runFinalizingPasses()
}
func (b *builder) runPreBlockLayoutPasses() {
passSortSuccessors(b)
passDeadBlockEliminationOpt(b)
passRedundantPhiEliminationOpt(b)
// The result of passCalculateImmediateDominators will be used by various passes below.
passCalculateImmediateDominators(b)
passNopInstElimination(b)
// TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic.
// WebAssembly program shouldn't result in irreducible CFG, but we should handle it properly in just in case.
// See FixIrreducible pass in LLVM: https://llvm.org/doxygen/FixIrreducible_8cpp_source.html
// TODO: implement more optimization passes like:
// block coalescing.
// Copy-propagation.
// Constant folding.
// Common subexpression elimination.
// Arithmetic simplifications.
// and more!
// passDeadCodeEliminationOpt could be more accurate if we do this after other optimizations.
passDeadCodeEliminationOpt(b)
b.donePreBlockLayoutPasses = true
}
func (b *builder) runBlockLayoutPass() {
if !b.donePreBlockLayoutPasses {
panic("runBlockLayoutPass must be called after all pre passes are done")
}
passLayoutBlocks(b)
b.doneBlockLayout = true
}
// runPostBlockLayoutPasses runs the post block layout passes. After this point, CFG is somewhat stable,
// but still can be modified before finalizing passes. At this point, critical edges are split by passLayoutBlocks.
func (b *builder) runPostBlockLayoutPasses() {
if !b.doneBlockLayout {
panic("runPostBlockLayoutPasses must be called after block layout pass is done")
}
// TODO: Do more. e.g. tail duplication, loop unrolling, etc.
b.donePostBlockLayoutPasses = true
}
// runFinalizingPasses runs the finalizing passes. After this point, CFG should not be modified.
func (b *builder) runFinalizingPasses() {
if !b.donePostBlockLayoutPasses {
panic("runFinalizingPasses must be called after post block layout passes are done")
}
// Critical edges are split, so we fix the loop nesting forest.
passBuildLoopNestingForest(b)
passBuildDominatorTree(b)
// Now that we know the final placement of the blocks, we can explicitly mark the fallthrough jumps.
b.markFallthroughJumps()
}
// passDeadBlockEliminationOpt searches the unreachable blocks, and sets the basicBlock.invalid flag true if so.
func passDeadBlockEliminationOpt(b *builder) {
entryBlk := b.entryBlk()
b.clearBlkVisited()
b.blkStack = append(b.blkStack, entryBlk)
for len(b.blkStack) > 0 {
reachableBlk := b.blkStack[len(b.blkStack)-1]
b.blkStack = b.blkStack[:len(b.blkStack)-1]
b.blkVisited[reachableBlk] = 0 // the value won't be used in this pass.
if !reachableBlk.sealed && !reachableBlk.ReturnBlock() {
panic(fmt.Sprintf("%s is not sealed", reachableBlk))
}
if wazevoapi.SSAValidationEnabled {
reachableBlk.validate(b)
}
for _, succ := range reachableBlk.success {
if _, ok := b.blkVisited[succ]; ok {
continue
}
b.blkStack = append(b.blkStack, succ)
}
}
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
if _, ok := b.blkVisited[blk]; !ok {
blk.invalid = true
}
}
}
// passRedundantPhiEliminationOpt eliminates the redundant PHIs (in our terminology, parameters of a block).
func passRedundantPhiEliminationOpt(b *builder) {
redundantParameterIndexes := b.ints[:0] // reuse the slice from previous iterations.
// TODO: this might be costly for large programs, but at least, as far as I did the experiment, it's almost the
// same as the single iteration version in terms of the overall compilation time. That *might be* mostly thanks to the fact
// that removing many PHIs results in the reduction of the total instructions, not because of this indefinite iteration is
// relatively small. For example, sqlite speedtest binary results in the large number of redundant PHIs,
// the maximum number of iteration was 22, which seems to be acceptable but not that small either since the
// complexity here is O(BlockNum * Iterations) at the worst case where BlockNum might be the order of thousands.
for {
changed := false
_ = b.blockIteratorBegin() // skip entry block!
// Below, we intentionally use the named iteration variable name, as this comes with inevitable nested for loops!
for blk := b.blockIteratorNext(); blk != nil; blk = b.blockIteratorNext() {
paramNum := len(blk.params)
for paramIndex := 0; paramIndex < paramNum; paramIndex++ {
phiValue := blk.params[paramIndex].value
redundant := true
nonSelfReferencingValue := ValueInvalid
for predIndex := range blk.preds {
br := blk.preds[predIndex].branch
// Resolve the alias in the arguments so that we could use the previous iteration's result.
b.resolveArgumentAlias(br)
pred := br.vs.View()[paramIndex]
if pred == phiValue {
// This is self-referencing: PHI from the same PHI.
continue
}
if !nonSelfReferencingValue.Valid() {
nonSelfReferencingValue = pred
continue
}
if nonSelfReferencingValue != pred {
redundant = false
break
}
}
if !nonSelfReferencingValue.Valid() {
// This shouldn't happen, and must be a bug in builder.go.
panic("BUG: params added but only self-referencing")
}
if redundant {
b.redundantParameterIndexToValue[paramIndex] = nonSelfReferencingValue
redundantParameterIndexes = append(redundantParameterIndexes, paramIndex)
}
}
if len(b.redundantParameterIndexToValue) == 0 {
continue
}
changed = true
// Remove the redundant PHIs from the argument list of branching instructions.
for predIndex := range blk.preds {
var cur int
predBlk := blk.preds[predIndex]
branchInst := predBlk.branch
view := branchInst.vs.View()
for argIndex, value := range view {
if _, ok := b.redundantParameterIndexToValue[argIndex]; !ok {
view[cur] = value
cur++
}
}
branchInst.vs.Cut(cur)
}
// Still need to have the definition of the value of the PHI (previously as the parameter).
for _, redundantParamIndex := range redundantParameterIndexes {
phiValue := blk.params[redundantParamIndex].value
onlyValue := b.redundantParameterIndexToValue[redundantParamIndex]
// Create an alias in this block from the only phi argument to the phi value.
b.alias(phiValue, onlyValue)
}
// Finally, Remove the param from the blk.
var cur int
for paramIndex := 0; paramIndex < paramNum; paramIndex++ {
param := blk.params[paramIndex]
if _, ok := b.redundantParameterIndexToValue[paramIndex]; !ok {
blk.params[cur] = param
cur++
}
}
blk.params = blk.params[:cur]
// Clears the map for the next iteration.
for _, paramIndex := range redundantParameterIndexes {
delete(b.redundantParameterIndexToValue, paramIndex)
}
redundantParameterIndexes = redundantParameterIndexes[:0]
}
if !changed {
break
}
}
// Reuse the slice for the future passes.
b.ints = redundantParameterIndexes
}
// passDeadCodeEliminationOpt traverses all the instructions, and calculates the reference count of each Value, and
// eliminates all the unnecessary instructions whose ref count is zero.
// The results are stored at builder.valueRefCounts. This also assigns a InstructionGroupID to each Instruction
// during the process. This is the last SSA-level optimization pass and after this,
// the SSA function is ready to be used by backends.
//
// TODO: the algorithm here might not be efficient. Get back to this later.
func passDeadCodeEliminationOpt(b *builder) {
nvid := int(b.nextValueID)
if nvid >= len(b.valueRefCounts) {
b.valueRefCounts = append(b.valueRefCounts, make([]int, b.nextValueID)...)
}
if nvid >= len(b.valueIDToInstruction) {
b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)
}
// First, we gather all the instructions with side effects.
liveInstructions := b.instStack[:0]
// During the process, we will assign InstructionGroupID to each instruction, which is not
// relevant to dead code elimination, but we need in the backend.
var gid InstructionGroupID
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
for cur := blk.rootInstr; cur != nil; cur = cur.next {
cur.gid = gid
switch cur.sideEffect() {
case sideEffectTraps:
// The trappable should always be alive.
liveInstructions = append(liveInstructions, cur)
case sideEffectStrict:
liveInstructions = append(liveInstructions, cur)
// The strict side effect should create different instruction groups.
gid++
}
r1, rs := cur.Returns()
if r1.Valid() {
b.valueIDToInstruction[r1.ID()] = cur
}
for _, r := range rs {
b.valueIDToInstruction[r.ID()] = cur
}
}
}
// Find all the instructions referenced by live instructions transitively.
for len(liveInstructions) > 0 {
tail := len(liveInstructions) - 1
live := liveInstructions[tail]
liveInstructions = liveInstructions[:tail]
if live.live {
// If it's already marked alive, this is referenced multiple times,
// so we can skip it.
continue
}
live.live = true
// Before we walk, we need to resolve the alias first.
b.resolveArgumentAlias(live)
v1, v2, v3, vs := live.Args()
if v1.Valid() {
producingInst := b.valueIDToInstruction[v1.ID()]
if producingInst != nil {
liveInstructions = append(liveInstructions, producingInst)
}
}
if v2.Valid() {
producingInst := b.valueIDToInstruction[v2.ID()]
if producingInst != nil {
liveInstructions = append(liveInstructions, producingInst)
}
}
if v3.Valid() {
producingInst := b.valueIDToInstruction[v3.ID()]
if producingInst != nil {
liveInstructions = append(liveInstructions, producingInst)
}
}
for _, v := range vs {
producingInst := b.valueIDToInstruction[v.ID()]
if producingInst != nil {
liveInstructions = append(liveInstructions, producingInst)
}
}
}
// Now that all the live instructions are flagged as live=true, we eliminate all dead instructions.
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
for cur := blk.rootInstr; cur != nil; cur = cur.next {
if !cur.live {
// Remove the instruction from the list.
if prev := cur.prev; prev != nil {
prev.next = cur.next
} else {
blk.rootInstr = cur.next
}
if next := cur.next; next != nil {
next.prev = cur.prev
}
continue
}
// If the value alive, we can be sure that arguments are used definitely.
// Hence, we can increment the value reference counts.
v1, v2, v3, vs := cur.Args()
if v1.Valid() {
b.incRefCount(v1.ID(), cur)
}
if v2.Valid() {
b.incRefCount(v2.ID(), cur)
}
if v3.Valid() {
b.incRefCount(v3.ID(), cur)
}
for _, v := range vs {
b.incRefCount(v.ID(), cur)
}
}
}
b.instStack = liveInstructions // we reuse the stack for the next iteration.
}
func (b *builder) incRefCount(id ValueID, from *Instruction) {
if wazevoapi.SSALoggingEnabled {
fmt.Printf("v%d referenced from %v\n", id, from.Format(b))
}
b.valueRefCounts[id]++
}
// clearBlkVisited clears the b.blkVisited map so that we can reuse it for multiple places.
func (b *builder) clearBlkVisited() {
b.blkStack2 = b.blkStack2[:0]
for key := range b.blkVisited {
b.blkStack2 = append(b.blkStack2, key)
}
for _, blk := range b.blkStack2 {
delete(b.blkVisited, blk)
}
b.blkStack2 = b.blkStack2[:0]
}
// passNopInstElimination eliminates the instructions which is essentially a no-op.
func passNopInstElimination(b *builder) {
if int(b.nextValueID) >= len(b.valueIDToInstruction) {
b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)
}
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
for cur := blk.rootInstr; cur != nil; cur = cur.next {
r1, rs := cur.Returns()
if r1.Valid() {
b.valueIDToInstruction[r1.ID()] = cur
}
for _, r := range rs {
b.valueIDToInstruction[r.ID()] = cur
}
}
}
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
for cur := blk.rootInstr; cur != nil; cur = cur.next {
switch cur.Opcode() {
// TODO: add more logics here.
case OpcodeIshl, OpcodeSshr, OpcodeUshr:
x, amount := cur.Arg2()
definingInst := b.valueIDToInstruction[amount.ID()]
if definingInst == nil {
// If there's no defining instruction, that means the amount is coming from the parameter.
continue
}
if definingInst.Constant() {
v := definingInst.ConstantVal()
if x.Type().Bits() == 64 {
v = v % 64
} else {
v = v % 32
}
if v == 0 {
b.alias(cur.Return(), x)
}
}
}
}
}
}
// passSortSuccessors sorts the successors of each block in the natural program order.
func passSortSuccessors(b *builder) {
for i := 0; i < b.basicBlocksPool.Allocated(); i++ {
blk := b.basicBlocksPool.View(i)
sortBlocks(blk.success)
}
}

View File

@ -0,0 +1,335 @@
package ssa
import (
"fmt"
"strings"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
// passLayoutBlocks implements Builder.LayoutBlocks. This re-organizes builder.reversePostOrderedBasicBlocks.
//
// TODO: there are tons of room for improvement here. e.g. LLVM has BlockPlacementPass using BlockFrequencyInfo,
// BranchProbabilityInfo, and LoopInfo to do a much better job. Also, if we have the profiling instrumentation
// like ball-larus algorithm, then we could do profile-guided optimization. Basically all of them are trying
// to maximize the fall-through opportunities which is most efficient.
//
// Here, fallthrough happens when a block ends with jump instruction whose target is the right next block in the
// builder.reversePostOrderedBasicBlocks.
//
// Currently, we just place blocks using the DFS reverse post-order of the dominator tree with the heuristics:
// 1. a split edge trampoline towards a loop header will be placed as a fallthrough.
// 2. we invert the brz and brnz if it makes the fallthrough more likely.
//
// This heuristic is done in maybeInvertBranches function.
func passLayoutBlocks(b *builder) {
b.clearBlkVisited()
// We might end up splitting critical edges which adds more basic blocks,
// so we store the currently existing basic blocks in nonSplitBlocks temporarily.
// That way we can iterate over the original basic blocks while appending new ones into reversePostOrderedBasicBlocks.
nonSplitBlocks := b.blkStack[:0]
for i, blk := range b.reversePostOrderedBasicBlocks {
if !blk.Valid() {
continue
}
nonSplitBlocks = append(nonSplitBlocks, blk)
if i != len(b.reversePostOrderedBasicBlocks)-1 {
_ = maybeInvertBranches(blk, b.reversePostOrderedBasicBlocks[i+1])
}
}
var trampolines []*basicBlock
// Reset the order slice since we update on the fly by splitting critical edges.
b.reversePostOrderedBasicBlocks = b.reversePostOrderedBasicBlocks[:0]
uninsertedTrampolines := b.blkStack2[:0]
for _, blk := range nonSplitBlocks {
for i := range blk.preds {
pred := blk.preds[i].blk
if _, ok := b.blkVisited[pred]; ok || !pred.Valid() {
continue
} else if pred.reversePostOrder < blk.reversePostOrder {
// This means the edge is critical, and this pred is the trampoline and yet to be inserted.
// Split edge trampolines must come before the destination in reverse post-order.
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, pred)
b.blkVisited[pred] = 0 // mark as inserted, the value is not used.
}
}
// Now that we've already added all the potential trampoline blocks incoming to this block,
// we can add this block itself.
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, blk)
b.blkVisited[blk] = 0 // mark as inserted, the value is not used.
if len(blk.success) < 2 {
// There won't be critical edge originating from this block.
continue
} else if blk.currentInstr.opcode == OpcodeBrTable {
// We don't split critical edges here, because at the construction site of BrTable, we already split the edges.
continue
}
for sidx, succ := range blk.success {
if !succ.ReturnBlock() && // If the successor is a return block, we need to split the edge any way because we need "epilogue" to be inserted.
// Plus if there's no multiple incoming edges to this successor, (pred, succ) is not critical.
len(succ.preds) < 2 {
continue
}
// Otherwise, we are sure this is a critical edge. To modify the CFG, we need to find the predecessor info
// from the successor.
var predInfo *basicBlockPredecessorInfo
for i := range succ.preds { // This linear search should not be a problem since the number of predecessors should almost always small.
pred := &succ.preds[i]
if pred.blk == blk {
predInfo = pred
break
}
}
if predInfo == nil {
// This must be a bug in somewhere around branch manipulation.
panic("BUG: predecessor info not found while the successor exists in successors list")
}
if wazevoapi.SSALoggingEnabled {
fmt.Printf("trying to split edge from %d->%d at %s\n",
blk.ID(), succ.ID(), predInfo.branch.Format(b))
}
trampoline := b.splitCriticalEdge(blk, succ, predInfo)
// Update the successors slice because the target is no longer the original `succ`.
blk.success[sidx] = trampoline
if wazevoapi.SSAValidationEnabled {
trampolines = append(trampolines, trampoline)
}
if wazevoapi.SSALoggingEnabled {
fmt.Printf("edge split from %d->%d at %s as %d->%d->%d \n",
blk.ID(), succ.ID(), predInfo.branch.Format(b),
blk.ID(), trampoline.ID(), succ.ID())
}
fallthroughBranch := blk.currentInstr
if fallthroughBranch.opcode == OpcodeJump && fallthroughBranch.blk == trampoline {
// This can be lowered as fallthrough at the end of the block.
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline)
b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used.
} else {
uninsertedTrampolines = append(uninsertedTrampolines, trampoline)
}
}
for _, trampoline := range uninsertedTrampolines {
if trampoline.success[0].reversePostOrder <= trampoline.reversePostOrder { // "<=", not "<" because the target might be itself.
// This means the critical edge was backward, so we insert after the current block immediately.
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline)
b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used.
} // If the target is forward, we can wait to insert until the target is inserted.
}
uninsertedTrampolines = uninsertedTrampolines[:0] // Reuse the stack for the next block.
}
if wazevoapi.SSALoggingEnabled {
var bs []string
for _, blk := range b.reversePostOrderedBasicBlocks {
bs = append(bs, blk.Name())
}
fmt.Println("ordered blocks: ", strings.Join(bs, ", "))
}
if wazevoapi.SSAValidationEnabled {
for _, trampoline := range trampolines {
if _, ok := b.blkVisited[trampoline]; !ok {
panic("BUG: trampoline block not inserted: " + trampoline.FormatHeader(b))
}
trampoline.validate(b)
}
}
// Reuse the stack for the next iteration.
b.blkStack2 = uninsertedTrampolines[:0]
}
// markFallthroughJumps finds the fallthrough jumps and marks them as such.
func (b *builder) markFallthroughJumps() {
l := len(b.reversePostOrderedBasicBlocks) - 1
for i, blk := range b.reversePostOrderedBasicBlocks {
if i < l {
cur := blk.currentInstr
if cur.opcode == OpcodeJump && cur.blk == b.reversePostOrderedBasicBlocks[i+1] {
cur.AsFallthroughJump()
}
}
}
}
// maybeInvertBranches inverts the branch instructions if it is likely possible to the fallthrough more likely with simple heuristics.
// nextInRPO is the next block in the reverse post-order.
//
// Returns true if the branch is inverted for testing purpose.
func maybeInvertBranches(now *basicBlock, nextInRPO *basicBlock) bool {
fallthroughBranch := now.currentInstr
if fallthroughBranch.opcode == OpcodeBrTable {
return false
}
condBranch := fallthroughBranch.prev
if condBranch == nil || (condBranch.opcode != OpcodeBrnz && condBranch.opcode != OpcodeBrz) {
return false
}
if len(fallthroughBranch.vs.View()) != 0 || len(condBranch.vs.View()) != 0 {
// If either one of them has arguments, we don't invert the branches.
return false
}
// So this block has two branches (a conditional branch followed by an unconditional branch) at the end.
// We can invert the condition of the branch if it makes the fallthrough more likely.
fallthroughTarget, condTarget := fallthroughBranch.blk.(*basicBlock), condBranch.blk.(*basicBlock)
if fallthroughTarget.loopHeader {
// First, if the tail's target is loopHeader, we don't need to do anything here,
// because the edge is likely to be critical edge for complex loops (e.g. loop with branches inside it).
// That means, we will split the edge in the end of LayoutBlocks function, and insert the trampoline block
// right after this block, which will be fallthrough in any way.
return false
} else if condTarget.loopHeader {
// On the other hand, if the condBranch's target is loopHeader, we invert the condition of the branch
// so that we could get the fallthrough to the trampoline block.
goto invert
}
if fallthroughTarget == nextInRPO {
// Also, if the tail's target is the next block in the reverse post-order, we don't need to do anything here,
// because if this is not critical edge, we would end up placing these two blocks adjacent to each other.
// Even if it is the critical edge, we place the trampoline block right after this block, which will be fallthrough in any way.
return false
} else if condTarget == nextInRPO {
// If the condBranch's target is the next block in the reverse post-order, we invert the condition of the branch
// so that we could get the fallthrough to the block.
goto invert
} else {
return false
}
invert:
for i := range fallthroughTarget.preds {
pred := &fallthroughTarget.preds[i]
if pred.branch == fallthroughBranch {
pred.branch = condBranch
break
}
}
for i := range condTarget.preds {
pred := &condTarget.preds[i]
if pred.branch == condBranch {
pred.branch = fallthroughBranch
break
}
}
condBranch.InvertBrx()
condBranch.blk = fallthroughTarget
fallthroughBranch.blk = condTarget
if wazevoapi.SSALoggingEnabled {
fmt.Printf("inverting branches at %d->%d and %d->%d\n",
now.ID(), fallthroughTarget.ID(), now.ID(), condTarget.ID())
}
return true
}
// splitCriticalEdge splits the critical edge between the given predecessor (`pred`) and successor (owning `predInfo`).
//
// - `pred` is the source of the critical edge,
// - `succ` is the destination of the critical edge,
// - `predInfo` is the predecessor info in the succ.preds slice which represents the critical edge.
//
// Why splitting critical edges is important? See following links:
//
// - https://en.wikipedia.org/wiki/Control-flow_graph
// - https://nickdesaulniers.github.io/blog/2023/01/27/critical-edge-splitting/
//
// The returned basic block is the trampoline block which is inserted to split the critical edge.
func (b *builder) splitCriticalEdge(pred, succ *basicBlock, predInfo *basicBlockPredecessorInfo) *basicBlock {
// In the following, we convert the following CFG:
//
// pred --(originalBranch)--> succ
//
// to the following CFG:
//
// pred --(newBranch)--> trampoline --(originalBranch)-> succ
//
// where trampoline is a new basic block which is created to split the critical edge.
trampoline := b.allocateBasicBlock()
if int(trampoline.id) >= len(b.dominators) {
b.dominators = append(b.dominators, make([]*basicBlock, trampoline.id+1)...)
}
b.dominators[trampoline.id] = pred
originalBranch := predInfo.branch
// Replace originalBranch with the newBranch.
newBranch := b.AllocateInstruction()
newBranch.opcode = originalBranch.opcode
newBranch.blk = trampoline
switch originalBranch.opcode {
case OpcodeJump:
case OpcodeBrz, OpcodeBrnz:
originalBranch.opcode = OpcodeJump // Trampoline consists of one unconditional branch.
newBranch.v = originalBranch.v
originalBranch.v = ValueInvalid
default:
panic("BUG: critical edge shouldn't be originated from br_table")
}
swapInstruction(pred, originalBranch, newBranch)
// Replace the original branch with the new branch.
trampoline.rootInstr = originalBranch
trampoline.currentInstr = originalBranch
trampoline.success = append(trampoline.success, succ) // Do not use []*basicBlock{pred} because we might have already allocated the slice.
trampoline.preds = append(trampoline.preds, // same as ^.
basicBlockPredecessorInfo{blk: pred, branch: newBranch})
b.Seal(trampoline)
// Update the original branch to point to the trampoline.
predInfo.blk = trampoline
predInfo.branch = originalBranch
if wazevoapi.SSAValidationEnabled {
trampoline.validate(b)
}
if len(trampoline.params) > 0 {
panic("trampoline should not have params")
}
// Assign the same order as the original block so that this will be placed before the actual destination.
trampoline.reversePostOrder = pred.reversePostOrder
return trampoline
}
// swapInstruction replaces `old` in the block `blk` with `New`.
func swapInstruction(blk *basicBlock, old, New *Instruction) {
if blk.rootInstr == old {
blk.rootInstr = New
next := old.next
New.next = next
next.prev = New
} else {
if blk.currentInstr == old {
blk.currentInstr = New
}
prev := old.prev
prev.next, New.prev = New, prev
if next := old.next; next != nil {
New.next, next.prev = next, New
}
}
old.prev, old.next = nil, nil
}

View File

@ -0,0 +1,312 @@
package ssa
import (
"fmt"
"math"
"strings"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
// passCalculateImmediateDominators calculates immediate dominators for each basic block.
// The result is stored in b.dominators. This make it possible for the following passes to
// use builder.isDominatedBy to check if a block is dominated by another block.
//
// At the last of pass, this function also does the loop detection and sets the basicBlock.loop flag.
func passCalculateImmediateDominators(b *builder) {
reversePostOrder := b.reversePostOrderedBasicBlocks[:0]
exploreStack := b.blkStack[:0]
b.clearBlkVisited()
entryBlk := b.entryBlk()
// Store the reverse postorder from the entrypoint into reversePostOrder slice.
// This calculation of reverse postorder is not described in the paper,
// so we use heuristic to calculate it so that we could potentially handle arbitrary
// complex CFGs under the assumption that success is sorted in program's natural order.
// That means blk.success[i] always appears before blk.success[i+1] in the source program,
// which is a reasonable assumption as long as SSA Builder is properly used.
//
// First we push blocks in postorder iteratively visit successors of the entry block.
exploreStack = append(exploreStack, entryBlk)
const visitStateUnseen, visitStateSeen, visitStateDone = 0, 1, 2
b.blkVisited[entryBlk] = visitStateSeen
for len(exploreStack) > 0 {
tail := len(exploreStack) - 1
blk := exploreStack[tail]
exploreStack = exploreStack[:tail]
switch b.blkVisited[blk] {
case visitStateUnseen:
// This is likely a bug in the frontend.
panic("BUG: unsupported CFG")
case visitStateSeen:
// This is the first time to pop this block, and we have to see the successors first.
// So push this block again to the stack.
exploreStack = append(exploreStack, blk)
// And push the successors to the stack if necessary.
for _, succ := range blk.success {
if succ.ReturnBlock() || succ.invalid {
continue
}
if b.blkVisited[succ] == visitStateUnseen {
b.blkVisited[succ] = visitStateSeen
exploreStack = append(exploreStack, succ)
}
}
// Finally, we could pop this block once we pop all of its successors.
b.blkVisited[blk] = visitStateDone
case visitStateDone:
// Note: at this point we push blk in postorder despite its name.
reversePostOrder = append(reversePostOrder, blk)
}
}
// At this point, reversePostOrder has postorder actually, so we reverse it.
for i := len(reversePostOrder)/2 - 1; i >= 0; i-- {
j := len(reversePostOrder) - 1 - i
reversePostOrder[i], reversePostOrder[j] = reversePostOrder[j], reversePostOrder[i]
}
for i, blk := range reversePostOrder {
blk.reversePostOrder = i
}
// Reuse the dominators slice if possible from the previous computation of function.
b.dominators = b.dominators[:cap(b.dominators)]
if len(b.dominators) < b.basicBlocksPool.Allocated() {
// Generously reserve space in the slice because the slice will be reused future allocation.
b.dominators = append(b.dominators, make([]*basicBlock, b.basicBlocksPool.Allocated())...)
}
calculateDominators(reversePostOrder, b.dominators)
// Reuse the slices for the future use.
b.blkStack = exploreStack
// For the following passes.
b.reversePostOrderedBasicBlocks = reversePostOrder
// Ready to detect loops!
subPassLoopDetection(b)
}
// calculateDominators calculates the immediate dominator of each node in the CFG, and store the result in `doms`.
// The algorithm is based on the one described in the paper "A Simple, Fast Dominance Algorithm"
// https://www.cs.rice.edu/~keith/EMBED/dom.pdf which is a faster/simple alternative to the well known Lengauer-Tarjan algorithm.
//
// The following code almost matches the pseudocode in the paper with one exception (see the code comment below).
//
// The result slice `doms` must be pre-allocated with the size larger than the size of dfsBlocks.
func calculateDominators(reversePostOrderedBlks []*basicBlock, doms []*basicBlock) {
entry, reversePostOrderedBlks := reversePostOrderedBlks[0], reversePostOrderedBlks[1: /* skips entry point */]
for _, blk := range reversePostOrderedBlks {
doms[blk.id] = nil
}
doms[entry.id] = entry
changed := true
for changed {
changed = false
for _, blk := range reversePostOrderedBlks {
var u *basicBlock
for i := range blk.preds {
pred := blk.preds[i].blk
// Skip if this pred is not reachable yet. Note that this is not described in the paper,
// but it is necessary to handle nested loops etc.
if doms[pred.id] == nil {
continue
}
if u == nil {
u = pred
continue
} else {
u = intersect(doms, u, pred)
}
}
if doms[blk.id] != u {
doms[blk.id] = u
changed = true
}
}
}
}
// intersect returns the common dominator of blk1 and blk2.
//
// This is the `intersect` function in the paper.
func intersect(doms []*basicBlock, blk1 *basicBlock, blk2 *basicBlock) *basicBlock {
finger1, finger2 := blk1, blk2
for finger1 != finger2 {
// Move the 'finger1' upwards to its immediate dominator.
for finger1.reversePostOrder > finger2.reversePostOrder {
finger1 = doms[finger1.id]
}
// Move the 'finger2' upwards to its immediate dominator.
for finger2.reversePostOrder > finger1.reversePostOrder {
finger2 = doms[finger2.id]
}
}
return finger1
}
// subPassLoopDetection detects loops in the function using the immediate dominators.
//
// This is run at the last of passCalculateImmediateDominators.
func subPassLoopDetection(b *builder) {
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
for i := range blk.preds {
pred := blk.preds[i].blk
if pred.invalid {
continue
}
if b.isDominatedBy(pred, blk) {
blk.loopHeader = true
}
}
}
}
// buildLoopNestingForest builds the loop nesting forest for the function.
// This must be called after branch splitting since it relies on the CFG.
func passBuildLoopNestingForest(b *builder) {
ent := b.entryBlk()
doms := b.dominators
for _, blk := range b.reversePostOrderedBasicBlocks {
n := doms[blk.id]
for !n.loopHeader && n != ent {
n = doms[n.id]
}
if n == ent && blk.loopHeader {
b.loopNestingForestRoots = append(b.loopNestingForestRoots, blk)
} else if n == ent {
} else if n.loopHeader {
n.loopNestingForestChildren = append(n.loopNestingForestChildren, blk)
}
}
if wazevoapi.SSALoggingEnabled {
for _, root := range b.loopNestingForestRoots {
printLoopNestingForest(root.(*basicBlock), 0)
}
}
}
func printLoopNestingForest(root *basicBlock, depth int) {
fmt.Println(strings.Repeat("\t", depth), "loop nesting forest root:", root.ID())
for _, child := range root.loopNestingForestChildren {
fmt.Println(strings.Repeat("\t", depth+1), "child:", child.ID())
if child.LoopHeader() {
printLoopNestingForest(child.(*basicBlock), depth+2)
}
}
}
type dominatorSparseTree struct {
time int
euler []*basicBlock
first, depth []int
table [][]int
}
// passBuildDominatorTree builds the dominator tree for the function, and constructs builder.sparseTree.
func passBuildDominatorTree(b *builder) {
// First we materialize the children of each node in the dominator tree.
idoms := b.dominators
for _, blk := range b.reversePostOrderedBasicBlocks {
parent := idoms[blk.id]
if parent == nil {
panic("BUG")
} else if parent == blk {
// This is the entry block.
continue
}
if prev := parent.child; prev == nil {
parent.child = blk
} else {
parent.child = blk
blk.sibling = prev
}
}
// Reset the state from the previous computation.
n := b.basicBlocksPool.Allocated()
st := &b.sparseTree
st.euler = append(st.euler[:0], make([]*basicBlock, 2*n-1)...)
st.first = append(st.first[:0], make([]int, n)...)
for i := range st.first {
st.first[i] = -1
}
st.depth = append(st.depth[:0], make([]int, 2*n-1)...)
st.time = 0
// Start building the sparse tree.
st.eulerTour(b.entryBlk(), 0)
st.buildSparseTable()
}
func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int) {
if wazevoapi.SSALoggingEnabled {
fmt.Println(strings.Repeat("\t", height), "euler tour:", node.ID())
}
dt.euler[dt.time] = node
dt.depth[dt.time] = height
if dt.first[node.id] == -1 {
dt.first[node.id] = dt.time
}
dt.time++
for child := node.child; child != nil; child = child.sibling {
dt.eulerTour(child, height+1)
dt.euler[dt.time] = node // add the current node again after visiting a child
dt.depth[dt.time] = height
dt.time++
}
}
// buildSparseTable builds a sparse table for RMQ queries.
func (dt *dominatorSparseTree) buildSparseTable() {
n := len(dt.depth)
k := int(math.Log2(float64(n))) + 1
table := dt.table
if n >= len(table) {
table = append(table, make([][]int, n+1)...)
}
for i := range table {
if len(table[i]) < k {
table[i] = append(table[i], make([]int, k)...)
}
table[i][0] = i
}
for j := 1; 1<<j <= n; j++ {
for i := 0; i+(1<<j)-1 < n; i++ {
if dt.depth[table[i][j-1]] < dt.depth[table[i+(1<<(j-1))][j-1]] {
table[i][j] = table[i][j-1]
} else {
table[i][j] = table[i+(1<<(j-1))][j-1]
}
}
}
dt.table = table
}
// rmq performs a range minimum query on the sparse table.
func (dt *dominatorSparseTree) rmq(l, r int) int {
table := dt.table
depth := dt.depth
j := int(math.Log2(float64(r - l + 1)))
if depth[table[l][j]] <= depth[table[r-(1<<j)+1][j]] {
return table[l][j]
}
return table[r-(1<<j)+1][j]
}
// findLCA finds the LCA using the Euler tour and RMQ.
func (dt *dominatorSparseTree) findLCA(u, v BasicBlockID) *basicBlock {
first := dt.first
if first[u] > first[v] {
u, v = v, u
}
return dt.euler[dt.rmq(first[u], first[v])]
}

View File

@ -0,0 +1,49 @@
package ssa
import (
"fmt"
"strings"
)
// Signature is a function prototype.
type Signature struct {
// ID is a unique identifier for this signature used to lookup.
ID SignatureID
// Params and Results are the types of the parameters and results of the function.
Params, Results []Type
// used is true if this is used by the currently-compiled function.
// Debugging only.
used bool
}
// String implements fmt.Stringer.
func (s *Signature) String() string {
str := strings.Builder{}
str.WriteString(s.ID.String())
str.WriteString(": ")
if len(s.Params) > 0 {
for _, typ := range s.Params {
str.WriteString(typ.String())
}
} else {
str.WriteByte('v')
}
str.WriteByte('_')
if len(s.Results) > 0 {
for _, typ := range s.Results {
str.WriteString(typ.String())
}
} else {
str.WriteByte('v')
}
return str.String()
}
// SignatureID is an unique identifier used to lookup.
type SignatureID int
// String implements fmt.Stringer.
func (s SignatureID) String() string {
return fmt.Sprintf("sig%d", s)
}

View File

@ -0,0 +1,14 @@
// Package ssa is used to construct SSA function. By nature this is free of Wasm specific thing
// and ISA.
//
// We use the "block argument" variant of SSA: https://en.wikipedia.org/wiki/Static_single-assignment_form#Block_arguments
// which is equivalent to the traditional PHI function based one, but more convenient during optimizations.
// However, in this package's source code comment, we might use PHI whenever it seems necessary in order to be aligned with
// existing literatures, e.g. SSA level optimization algorithms are often described using PHI nodes.
//
// The rationale doc for the choice of "block argument" by MLIR of LLVM is worth a read:
// https://mlir.llvm.org/docs/Rationale/Rationale/#block-arguments-vs-phi-nodes
//
// The algorithm to resolve variable definitions used here is based on the paper
// "Simple and Efficient Construction of Static Single Assignment Form": https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf.
package ssa

View File

@ -0,0 +1,112 @@
package ssa
type Type byte
const (
typeInvalid Type = iota
// TODO: add 8, 16 bit types when it's needed for optimizations.
// TypeI32 represents an integer type with 32 bits.
TypeI32
// TypeI64 represents an integer type with 64 bits.
TypeI64
// TypeF32 represents 32-bit floats in the IEEE 754.
TypeF32
// TypeF64 represents 64-bit floats in the IEEE 754.
TypeF64
// TypeV128 represents 128-bit SIMD vectors.
TypeV128
)
// String implements fmt.Stringer.
func (t Type) String() (ret string) {
switch t {
case typeInvalid:
return "invalid"
case TypeI32:
return "i32"
case TypeI64:
return "i64"
case TypeF32:
return "f32"
case TypeF64:
return "f64"
case TypeV128:
return "v128"
default:
panic(int(t))
}
}
// IsInt returns true if the type is an integer type.
func (t Type) IsInt() bool {
return t == TypeI32 || t == TypeI64
}
// IsFloat returns true if the type is a floating point type.
func (t Type) IsFloat() bool {
return t == TypeF32 || t == TypeF64
}
// Bits returns the number of bits required to represent the type.
func (t Type) Bits() byte {
switch t {
case TypeI32, TypeF32:
return 32
case TypeI64, TypeF64:
return 64
case TypeV128:
return 128
default:
panic(int(t))
}
}
// Size returns the number of bytes required to represent the type.
func (t Type) Size() byte {
return t.Bits() / 8
}
func (t Type) invalid() bool {
return t == typeInvalid
}
// VecLane represents a lane in a SIMD vector.
type VecLane byte
const (
VecLaneInvalid VecLane = 1 + iota
VecLaneI8x16
VecLaneI16x8
VecLaneI32x4
VecLaneI64x2
VecLaneF32x4
VecLaneF64x2
)
// String implements fmt.Stringer.
func (vl VecLane) String() (ret string) {
switch vl {
case VecLaneInvalid:
return "invalid"
case VecLaneI8x16:
return "i8x16"
case VecLaneI16x8:
return "i16x8"
case VecLaneI32x4:
return "i32x4"
case VecLaneI64x2:
return "i64x2"
case VecLaneF32x4:
return "f32x4"
case VecLaneF64x2:
return "f64x2"
default:
panic(int(vl))
}
}

View File

@ -0,0 +1,87 @@
package ssa
import (
"fmt"
"math"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
// Variable is a unique identifier for a source program's variable and will correspond to
// multiple ssa Value(s).
//
// For example, `Local 1` is a Variable in WebAssembly, and Value(s) will be created for it
// whenever it executes `local.set 1`.
//
// Variable is useful to track the SSA Values of a variable in the source program, and
// can be used to find the corresponding latest SSA Value via Builder.FindValue.
type Variable uint32
// String implements fmt.Stringer.
func (v Variable) String() string {
return fmt.Sprintf("var%d", v)
}
// Value represents an SSA value with a type information. The relationship with Variable is 1: N (including 0),
// that means there might be multiple Variable(s) for a Value.
//
// Higher 32-bit is used to store Type for this value.
type Value uint64
// ValueID is the lower 32bit of Value, which is the pure identifier of Value without type info.
type ValueID uint32
const (
valueIDInvalid ValueID = math.MaxUint32
ValueInvalid Value = Value(valueIDInvalid)
)
// Format creates a debug string for this Value using the data stored in Builder.
func (v Value) Format(b Builder) string {
if annotation, ok := b.(*builder).valueAnnotations[v.ID()]; ok {
return annotation
}
return fmt.Sprintf("v%d", v.ID())
}
func (v Value) formatWithType(b Builder) (ret string) {
if annotation, ok := b.(*builder).valueAnnotations[v.ID()]; ok {
ret = annotation + ":" + v.Type().String()
} else {
ret = fmt.Sprintf("v%d:%s", v.ID(), v.Type())
}
if wazevoapi.SSALoggingEnabled { // This is useful to check live value analysis bugs.
if bd := b.(*builder); bd.donePostBlockLayoutPasses {
id := v.ID()
ret += fmt.Sprintf("(ref=%d)", bd.valueRefCounts[id])
}
}
return ret
}
// Valid returns true if this value is valid.
func (v Value) Valid() bool {
return v.ID() != valueIDInvalid
}
// Type returns the Type of this value.
func (v Value) Type() Type {
return Type(v >> 32)
}
// ID returns the valueID of this value.
func (v Value) ID() ValueID {
return ValueID(v)
}
// setType sets a type to this Value and returns the updated Value.
func (v Value) setType(typ Type) Value {
return v | Value(typ)<<32
}
// Values is a slice of Value. Use this instead of []Value to reuse the underlying memory.
type Values = wazevoapi.VarLength[Value]
// ValuesNil is a nil Values.
var ValuesNil = wazevoapi.NewNilVarLength[Value]()

View File

@ -0,0 +1,196 @@
package wazevoapi
import (
"context"
"encoding/hex"
"fmt"
"math/rand"
"os"
"time"
)
// These consts are used various places in the wazevo implementations.
// Instead of defining them in each file, we define them here so that we can quickly iterate on
// debugging without spending "where do we have debug logging?" time.
// ----- Debug logging -----
// These consts must be disabled by default. Enable them only when debugging.
const (
FrontEndLoggingEnabled = false
SSALoggingEnabled = false
RegAllocLoggingEnabled = false
)
// ----- Output prints -----
// These consts must be disabled by default. Enable them only when debugging.
const (
PrintSSA = false
PrintOptimizedSSA = false
PrintSSAToBackendIRLowering = false
PrintRegisterAllocated = false
PrintFinalizedMachineCode = false
PrintMachineCodeHexPerFunction = printMachineCodeHexPerFunctionUnmodified || PrintMachineCodeHexPerFunctionDisassemblable //nolint
printMachineCodeHexPerFunctionUnmodified = false
// PrintMachineCodeHexPerFunctionDisassemblable prints the machine code while modifying the actual result
// to make it disassemblable. This is useful when debugging the final machine code. See the places where this is used for detail.
// When this is enabled, functions must not be called.
PrintMachineCodeHexPerFunctionDisassemblable = false
)
// printTarget is the function index to print the machine code. This is used for debugging to print the machine code
// of a specific function.
const printTarget = -1
// PrintEnabledIndex returns true if the current function index is the print target.
func PrintEnabledIndex(ctx context.Context) bool {
if printTarget == -1 {
return true
}
return GetCurrentFunctionIndex(ctx) == printTarget
}
// ----- Validations -----
const (
// SSAValidationEnabled enables the SSA validation. This is disabled by default since the operation is expensive.
SSAValidationEnabled = false
)
// ----- Stack Guard Check -----
const (
// StackGuardCheckEnabled enables the stack guard check to ensure that our stack bounds check works correctly.
StackGuardCheckEnabled = false
StackGuardCheckGuardPageSize = 8096
)
// CheckStackGuardPage checks the given stack guard page is not corrupted.
func CheckStackGuardPage(s []byte) {
for i := 0; i < StackGuardCheckGuardPageSize; i++ {
if s[i] != 0 {
panic(
fmt.Sprintf("BUG: stack guard page is corrupted:\n\tguard_page=%s\n\tstack=%s",
hex.EncodeToString(s[:StackGuardCheckGuardPageSize]),
hex.EncodeToString(s[StackGuardCheckGuardPageSize:]),
))
}
}
}
// ----- Deterministic compilation verifier -----
const (
// DeterministicCompilationVerifierEnabled enables the deterministic compilation verifier. This is disabled by default
// since the operation is expensive. But when in doubt, enable this to make sure the compilation is deterministic.
DeterministicCompilationVerifierEnabled = false
DeterministicCompilationVerifyingIter = 5
)
type (
verifierState struct {
initialCompilationDone bool
maybeRandomizedIndexes []int
r *rand.Rand
values map[string]string
}
verifierStateContextKey struct{}
currentFunctionNameKey struct{}
currentFunctionIndexKey struct{}
)
// NewDeterministicCompilationVerifierContext creates a new context with the deterministic compilation verifier used per wasm.Module.
func NewDeterministicCompilationVerifierContext(ctx context.Context, localFunctions int) context.Context {
maybeRandomizedIndexes := make([]int, localFunctions)
for i := range maybeRandomizedIndexes {
maybeRandomizedIndexes[i] = i
}
r := rand.New(rand.NewSource(time.Now().UnixNano()))
return context.WithValue(ctx, verifierStateContextKey{}, &verifierState{
r: r, maybeRandomizedIndexes: maybeRandomizedIndexes, values: map[string]string{},
})
}
// DeterministicCompilationVerifierRandomizeIndexes randomizes the indexes for the deterministic compilation verifier.
// To get the randomized index, use DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex.
func DeterministicCompilationVerifierRandomizeIndexes(ctx context.Context) {
state := ctx.Value(verifierStateContextKey{}).(*verifierState)
if !state.initialCompilationDone {
// If this is the first attempt, we use the index as-is order.
state.initialCompilationDone = true
return
}
r := state.r
r.Shuffle(len(state.maybeRandomizedIndexes), func(i, j int) {
state.maybeRandomizedIndexes[i], state.maybeRandomizedIndexes[j] = state.maybeRandomizedIndexes[j], state.maybeRandomizedIndexes[i]
})
}
// DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex returns the randomized index for the given `index`
// which is assigned by DeterministicCompilationVerifierRandomizeIndexes.
func DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx context.Context, index int) int {
state := ctx.Value(verifierStateContextKey{}).(*verifierState)
ret := state.maybeRandomizedIndexes[index]
return ret
}
// VerifyOrSetDeterministicCompilationContextValue verifies that the `newValue` is the same as the previous value for the given `scope`
// and the current function name. If the previous value doesn't exist, it sets the value to the given `newValue`.
//
// If the verification fails, this prints the diff and exits the process.
func VerifyOrSetDeterministicCompilationContextValue(ctx context.Context, scope string, newValue string) {
fn := ctx.Value(currentFunctionNameKey{}).(string)
key := fn + ": " + scope
verifierCtx := ctx.Value(verifierStateContextKey{}).(*verifierState)
oldValue, ok := verifierCtx.values[key]
if !ok {
verifierCtx.values[key] = newValue
return
}
if oldValue != newValue {
fmt.Printf(
`BUG: Deterministic compilation failed for function%s at scope="%s".
This is mostly due to (but might not be limited to):
* Resetting ssa.Builder, backend.Compiler or frontend.Compiler, etc doens't work as expected, and the compilation has been affected by the previous iterations.
* Using a map with non-deterministic iteration order.
---------- [old] ----------
%s
---------- [new] ----------
%s
`,
fn, scope, oldValue, newValue,
)
os.Exit(1)
}
}
// nolint
const NeedFunctionNameInContext = PrintSSA ||
PrintOptimizedSSA ||
PrintSSAToBackendIRLowering ||
PrintRegisterAllocated ||
PrintFinalizedMachineCode ||
PrintMachineCodeHexPerFunction ||
DeterministicCompilationVerifierEnabled ||
PerfMapEnabled
// SetCurrentFunctionName sets the current function name to the given `functionName`.
func SetCurrentFunctionName(ctx context.Context, index int, functionName string) context.Context {
ctx = context.WithValue(ctx, currentFunctionNameKey{}, functionName)
ctx = context.WithValue(ctx, currentFunctionIndexKey{}, index)
return ctx
}
// GetCurrentFunctionName returns the current function name.
func GetCurrentFunctionName(ctx context.Context) string {
ret, _ := ctx.Value(currentFunctionNameKey{}).(string)
return ret
}
// GetCurrentFunctionIndex returns the current function index.
func GetCurrentFunctionIndex(ctx context.Context) int {
ret, _ := ctx.Value(currentFunctionIndexKey{}).(int)
return ret
}

View File

@ -0,0 +1,109 @@
package wazevoapi
// ExitCode is an exit code of an execution of a function.
type ExitCode uint32
const (
ExitCodeOK ExitCode = iota
ExitCodeGrowStack
ExitCodeGrowMemory
ExitCodeUnreachable
ExitCodeMemoryOutOfBounds
// ExitCodeCallGoModuleFunction is an exit code for a call to an api.GoModuleFunction.
ExitCodeCallGoModuleFunction
// ExitCodeCallGoFunction is an exit code for a call to an api.GoFunction.
ExitCodeCallGoFunction
ExitCodeTableOutOfBounds
ExitCodeIndirectCallNullPointer
ExitCodeIndirectCallTypeMismatch
ExitCodeIntegerDivisionByZero
ExitCodeIntegerOverflow
ExitCodeInvalidConversionToInteger
ExitCodeCheckModuleExitCode
ExitCodeCallListenerBefore
ExitCodeCallListenerAfter
ExitCodeCallGoModuleFunctionWithListener
ExitCodeCallGoFunctionWithListener
ExitCodeTableGrow
ExitCodeRefFunc
ExitCodeMemoryWait32
ExitCodeMemoryWait64
ExitCodeMemoryNotify
ExitCodeUnalignedAtomic
exitCodeMax
)
const ExitCodeMask = 0xff
// String implements fmt.Stringer.
func (e ExitCode) String() string {
switch e {
case ExitCodeOK:
return "ok"
case ExitCodeGrowStack:
return "grow_stack"
case ExitCodeCallGoModuleFunction:
return "call_go_module_function"
case ExitCodeCallGoFunction:
return "call_go_function"
case ExitCodeUnreachable:
return "unreachable"
case ExitCodeMemoryOutOfBounds:
return "memory_out_of_bounds"
case ExitCodeUnalignedAtomic:
return "unaligned_atomic"
case ExitCodeTableOutOfBounds:
return "table_out_of_bounds"
case ExitCodeIndirectCallNullPointer:
return "indirect_call_null_pointer"
case ExitCodeIndirectCallTypeMismatch:
return "indirect_call_type_mismatch"
case ExitCodeIntegerDivisionByZero:
return "integer_division_by_zero"
case ExitCodeIntegerOverflow:
return "integer_overflow"
case ExitCodeInvalidConversionToInteger:
return "invalid_conversion_to_integer"
case ExitCodeCheckModuleExitCode:
return "check_module_exit_code"
case ExitCodeCallListenerBefore:
return "call_listener_before"
case ExitCodeCallListenerAfter:
return "call_listener_after"
case ExitCodeCallGoModuleFunctionWithListener:
return "call_go_module_function_with_listener"
case ExitCodeCallGoFunctionWithListener:
return "call_go_function_with_listener"
case ExitCodeGrowMemory:
return "grow_memory"
case ExitCodeTableGrow:
return "table_grow"
case ExitCodeRefFunc:
return "ref_func"
case ExitCodeMemoryWait32:
return "memory_wait32"
case ExitCodeMemoryWait64:
return "memory_wait64"
case ExitCodeMemoryNotify:
return "memory_notify"
}
panic("TODO")
}
func ExitCodeCallGoModuleFunctionWithIndex(index int, withListener bool) ExitCode {
if withListener {
return ExitCodeCallGoModuleFunctionWithListener | ExitCode(index<<8)
}
return ExitCodeCallGoModuleFunction | ExitCode(index<<8)
}
func ExitCodeCallGoFunctionWithIndex(index int, withListener bool) ExitCode {
if withListener {
return ExitCodeCallGoFunctionWithListener | ExitCode(index<<8)
}
return ExitCodeCallGoFunction | ExitCode(index<<8)
}
func GoFunctionIndexFromExitCode(exitCode ExitCode) int {
return int(exitCode >> 8)
}

View File

@ -0,0 +1,216 @@
package wazevoapi
import (
"github.com/tetratelabs/wazero/internal/wasm"
)
const (
// FunctionInstanceSize is the size of wazevo.functionInstance.
FunctionInstanceSize = 24
// FunctionInstanceExecutableOffset is an offset of `executable` field in wazevo.functionInstance
FunctionInstanceExecutableOffset = 0
// FunctionInstanceModuleContextOpaquePtrOffset is an offset of `moduleContextOpaquePtr` field in wazevo.functionInstance
FunctionInstanceModuleContextOpaquePtrOffset = 8
// FunctionInstanceTypeIDOffset is an offset of `typeID` field in wazevo.functionInstance
FunctionInstanceTypeIDOffset = 16
)
const (
// ExecutionContextOffsetExitCodeOffset is an offset of `exitCode` field in wazevo.executionContext
ExecutionContextOffsetExitCodeOffset Offset = 0
// ExecutionContextOffsetCallerModuleContextPtr is an offset of `callerModuleContextPtr` field in wazevo.executionContext
ExecutionContextOffsetCallerModuleContextPtr Offset = 8
// ExecutionContextOffsetOriginalFramePointer is an offset of `originalFramePointer` field in wazevo.executionContext
ExecutionContextOffsetOriginalFramePointer Offset = 16
// ExecutionContextOffsetOriginalStackPointer is an offset of `originalStackPointer` field in wazevo.executionContext
ExecutionContextOffsetOriginalStackPointer Offset = 24
// ExecutionContextOffsetGoReturnAddress is an offset of `goReturnAddress` field in wazevo.executionContext
ExecutionContextOffsetGoReturnAddress Offset = 32
// ExecutionContextOffsetStackBottomPtr is an offset of `stackBottomPtr` field in wazevo.executionContext
ExecutionContextOffsetStackBottomPtr Offset = 40
// ExecutionContextOffsetGoCallReturnAddress is an offset of `goCallReturnAddress` field in wazevo.executionContext
ExecutionContextOffsetGoCallReturnAddress Offset = 48
// ExecutionContextOffsetStackPointerBeforeGoCall is an offset of `StackPointerBeforeGoCall` field in wazevo.executionContext
ExecutionContextOffsetStackPointerBeforeGoCall Offset = 56
// ExecutionContextOffsetStackGrowRequiredSize is an offset of `stackGrowRequiredSize` field in wazevo.executionContext
ExecutionContextOffsetStackGrowRequiredSize Offset = 64
// ExecutionContextOffsetMemoryGrowTrampolineAddress is an offset of `memoryGrowTrampolineAddress` field in wazevo.executionContext
ExecutionContextOffsetMemoryGrowTrampolineAddress Offset = 72
// ExecutionContextOffsetStackGrowCallTrampolineAddress is an offset of `stackGrowCallTrampolineAddress` field in wazevo.executionContext.
ExecutionContextOffsetStackGrowCallTrampolineAddress Offset = 80
// ExecutionContextOffsetCheckModuleExitCodeTrampolineAddress is an offset of `checkModuleExitCodeTrampolineAddress` field in wazevo.executionContext.
ExecutionContextOffsetCheckModuleExitCodeTrampolineAddress Offset = 88
// ExecutionContextOffsetSavedRegistersBegin is an offset of the first element of `savedRegisters` field in wazevo.executionContext
ExecutionContextOffsetSavedRegistersBegin Offset = 96
// ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque is an offset of `goFunctionCallCalleeModuleContextOpaque` field in wazevo.executionContext
ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque Offset = 1120
// ExecutionContextOffsetTableGrowTrampolineAddress is an offset of `tableGrowTrampolineAddress` field in wazevo.executionContext
ExecutionContextOffsetTableGrowTrampolineAddress Offset = 1128
// ExecutionContextOffsetRefFuncTrampolineAddress is an offset of `refFuncTrampolineAddress` field in wazevo.executionContext
ExecutionContextOffsetRefFuncTrampolineAddress Offset = 1136
ExecutionContextOffsetMemmoveAddress Offset = 1144
ExecutionContextOffsetFramePointerBeforeGoCall Offset = 1152
ExecutionContextOffsetMemoryWait32TrampolineAddress Offset = 1160
ExecutionContextOffsetMemoryWait64TrampolineAddress Offset = 1168
ExecutionContextOffsetMemoryNotifyTrampolineAddress Offset = 1176
)
// ModuleContextOffsetData allows the compilers to get the information about offsets to the fields of wazevo.moduleContextOpaque,
// This is unique per module.
type ModuleContextOffsetData struct {
TotalSize int
ModuleInstanceOffset,
LocalMemoryBegin,
ImportedMemoryBegin,
ImportedFunctionsBegin,
GlobalsBegin,
TypeIDs1stElement,
TablesBegin,
BeforeListenerTrampolines1stElement,
AfterListenerTrampolines1stElement,
DataInstances1stElement,
ElementInstances1stElement Offset
}
// ImportedFunctionOffset returns an offset of the i-th imported function.
// Each item is stored as wazevo.functionInstance whose size matches FunctionInstanceSize.
func (m *ModuleContextOffsetData) ImportedFunctionOffset(i wasm.Index) (
executableOffset, moduleCtxOffset, typeIDOffset Offset,
) {
base := m.ImportedFunctionsBegin + Offset(i)*FunctionInstanceSize
return base, base + 8, base + 16
}
// GlobalInstanceOffset returns an offset of the i-th global instance.
func (m *ModuleContextOffsetData) GlobalInstanceOffset(i wasm.Index) Offset {
return m.GlobalsBegin + Offset(i)*16
}
// Offset represents an offset of a field of a struct.
type Offset int32
// U32 encodes an Offset as uint32 for convenience.
func (o Offset) U32() uint32 {
return uint32(o)
}
// I64 encodes an Offset as int64 for convenience.
func (o Offset) I64() int64 {
return int64(o)
}
// U64 encodes an Offset as int64 for convenience.
func (o Offset) U64() uint64 {
return uint64(o)
}
// LocalMemoryBase returns an offset of the first byte of the local memory.
func (m *ModuleContextOffsetData) LocalMemoryBase() Offset {
return m.LocalMemoryBegin
}
// LocalMemoryLen returns an offset of the length of the local memory buffer.
func (m *ModuleContextOffsetData) LocalMemoryLen() Offset {
if l := m.LocalMemoryBegin; l >= 0 {
return l + 8
}
return -1
}
// TableOffset returns an offset of the i-th table instance.
func (m *ModuleContextOffsetData) TableOffset(tableIndex int) Offset {
return m.TablesBegin + Offset(tableIndex)*8
}
// NewModuleContextOffsetData creates a ModuleContextOffsetData determining the structure of moduleContextOpaque for the given Module.
// The structure is described in the comment of wazevo.moduleContextOpaque.
func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContextOffsetData {
ret := ModuleContextOffsetData{}
var offset Offset
ret.ModuleInstanceOffset = 0
offset += 8
if m.MemorySection != nil {
ret.LocalMemoryBegin = offset
// buffer base + memory size.
const localMemorySizeInOpaqueModuleContext = 16
offset += localMemorySizeInOpaqueModuleContext
} else {
// Indicates that there's no local memory
ret.LocalMemoryBegin = -1
}
if m.ImportMemoryCount > 0 {
offset = align8(offset)
// *wasm.MemoryInstance + imported memory's owner (moduleContextOpaque)
const importedMemorySizeInOpaqueModuleContext = 16
ret.ImportedMemoryBegin = offset
offset += importedMemorySizeInOpaqueModuleContext
} else {
// Indicates that there's no imported memory
ret.ImportedMemoryBegin = -1
}
if m.ImportFunctionCount > 0 {
offset = align8(offset)
ret.ImportedFunctionsBegin = offset
// Each function is stored wazevo.functionInstance.
size := int(m.ImportFunctionCount) * FunctionInstanceSize
offset += Offset(size)
} else {
ret.ImportedFunctionsBegin = -1
}
if globals := int(m.ImportGlobalCount) + len(m.GlobalSection); globals > 0 {
// Align to 16 bytes for globals, as f32/f64/v128 might be loaded via SIMD instructions.
offset = align16(offset)
ret.GlobalsBegin = offset
// Pointers to *wasm.GlobalInstance.
offset += Offset(globals) * 16
} else {
ret.GlobalsBegin = -1
}
if tables := len(m.TableSection) + int(m.ImportTableCount); tables > 0 {
offset = align8(offset)
ret.TypeIDs1stElement = offset
offset += 8 // First element of TypeIDs.
ret.TablesBegin = offset
// Pointers to *wasm.TableInstance.
offset += Offset(tables) * 8
} else {
ret.TypeIDs1stElement = -1
ret.TablesBegin = -1
}
if withListener {
offset = align8(offset)
ret.BeforeListenerTrampolines1stElement = offset
offset += 8 // First element of BeforeListenerTrampolines.
ret.AfterListenerTrampolines1stElement = offset
offset += 8 // First element of AfterListenerTrampolines.
} else {
ret.BeforeListenerTrampolines1stElement = -1
ret.AfterListenerTrampolines1stElement = -1
}
ret.DataInstances1stElement = offset
offset += 8 // First element of DataInstances.
ret.ElementInstances1stElement = offset
offset += 8 // First element of ElementInstances.
ret.TotalSize = int(align16(offset))
return ret
}
func align16(o Offset) Offset {
return (o + 15) &^ 15
}
func align8(o Offset) Offset {
return (o + 7) &^ 7
}

View File

@ -0,0 +1,96 @@
package wazevoapi
import (
"fmt"
"os"
"strconv"
"sync"
)
var PerfMap *Perfmap
func init() {
if PerfMapEnabled {
pid := os.Getpid()
filename := "/tmp/perf-" + strconv.Itoa(pid) + ".map"
fh, err := os.OpenFile(filename, os.O_APPEND|os.O_RDWR|os.O_CREATE, 0o644)
if err != nil {
panic(err)
}
PerfMap = &Perfmap{fh: fh}
}
}
// Perfmap holds perfmap entries to be flushed into a perfmap file.
type Perfmap struct {
entries []entry
mux sync.Mutex
fh *os.File
}
type entry struct {
index int
offset int64
size uint64
name string
}
func (f *Perfmap) Lock() {
f.mux.Lock()
}
func (f *Perfmap) Unlock() {
f.mux.Unlock()
}
// AddModuleEntry adds a perfmap entry into the perfmap file.
// index is the index of the function in the module, offset is the offset of the function in the module,
// size is the size of the function, and name is the name of the function.
//
// Note that the entries are not flushed into the perfmap file until Flush is called,
// and the entries are module-scoped; Perfmap must be locked until Flush is called.
func (f *Perfmap) AddModuleEntry(index int, offset int64, size uint64, name string) {
e := entry{index: index, offset: offset, size: size, name: name}
if f.entries == nil {
f.entries = []entry{e}
return
}
f.entries = append(f.entries, e)
}
// Flush writes the perfmap entries into the perfmap file where the entries are adjusted by the given `addr` and `functionOffsets`.
func (f *Perfmap) Flush(addr uintptr, functionOffsets []int) {
defer func() {
_ = f.fh.Sync()
}()
for _, e := range f.entries {
if _, err := f.fh.WriteString(fmt.Sprintf("%x %s %s\n",
uintptr(e.offset)+addr+uintptr(functionOffsets[e.index]),
strconv.FormatUint(e.size, 16),
e.name,
)); err != nil {
panic(err)
}
}
f.entries = f.entries[:0]
}
// Clear clears the perfmap entries not yet flushed.
func (f *Perfmap) Clear() {
f.entries = f.entries[:0]
}
// AddEntry writes a perfmap entry directly into the perfmap file, not using the entries.
func (f *Perfmap) AddEntry(addr uintptr, size uint64, name string) {
_, err := f.fh.WriteString(fmt.Sprintf("%x %s %s\n",
addr,
strconv.FormatUint(size, 16),
name,
))
if err != nil {
panic(err)
}
}

View File

@ -0,0 +1,5 @@
//go:build !perfmap
package wazevoapi
const PerfMapEnabled = false

View File

@ -0,0 +1,5 @@
//go:build perfmap
package wazevoapi
const PerfMapEnabled = true

View File

@ -0,0 +1,215 @@
package wazevoapi
const poolPageSize = 128
// Pool is a pool of T that can be allocated and reset.
// This is useful to avoid unnecessary allocations.
type Pool[T any] struct {
pages []*[poolPageSize]T
resetFn func(*T)
allocated, index int
}
// NewPool returns a new Pool.
// resetFn is called when a new T is allocated in Pool.Allocate.
func NewPool[T any](resetFn func(*T)) Pool[T] {
var ret Pool[T]
ret.resetFn = resetFn
ret.Reset()
return ret
}
// Allocated returns the number of allocated T currently in the pool.
func (p *Pool[T]) Allocated() int {
return p.allocated
}
// Allocate allocates a new T from the pool.
func (p *Pool[T]) Allocate() *T {
if p.index == poolPageSize {
if len(p.pages) == cap(p.pages) {
p.pages = append(p.pages, new([poolPageSize]T))
} else {
i := len(p.pages)
p.pages = p.pages[:i+1]
if p.pages[i] == nil {
p.pages[i] = new([poolPageSize]T)
}
}
p.index = 0
}
ret := &p.pages[len(p.pages)-1][p.index]
if p.resetFn != nil {
p.resetFn(ret)
}
p.index++
p.allocated++
return ret
}
// View returns the pointer to i-th item from the pool.
func (p *Pool[T]) View(i int) *T {
page, index := i/poolPageSize, i%poolPageSize
return &p.pages[page][index]
}
// Reset resets the pool.
func (p *Pool[T]) Reset() {
p.pages = p.pages[:0]
p.index = poolPageSize
p.allocated = 0
}
// IDedPool is a pool of T that can be allocated and reset, with a way to get T by an ID.
type IDedPool[T any] struct {
pool Pool[T]
idToItems []*T
maxIDEncountered int
}
// NewIDedPool returns a new IDedPool.
func NewIDedPool[T any](resetFn func(*T)) IDedPool[T] {
return IDedPool[T]{pool: NewPool[T](resetFn)}
}
// GetOrAllocate returns the T with the given id.
func (p *IDedPool[T]) GetOrAllocate(id int) *T {
if p.maxIDEncountered < id {
p.maxIDEncountered = id
}
if id >= len(p.idToItems) {
p.idToItems = append(p.idToItems, make([]*T, id-len(p.idToItems)+1)...)
}
if p.idToItems[id] == nil {
p.idToItems[id] = p.pool.Allocate()
}
return p.idToItems[id]
}
// Get returns the T with the given id, or nil if it's not allocated.
func (p *IDedPool[T]) Get(id int) *T {
if id >= len(p.idToItems) {
return nil
}
return p.idToItems[id]
}
// Reset resets the pool.
func (p *IDedPool[T]) Reset() {
p.pool.Reset()
for i := range p.idToItems {
p.idToItems[i] = nil
}
p.maxIDEncountered = -1
}
// MaxIDEncountered returns the maximum id encountered so far.
func (p *IDedPool[T]) MaxIDEncountered() int {
return p.maxIDEncountered
}
// arraySize is the size of the array used in VarLengthPool's arrayPool.
// This is chosen to be 8, which is empirically a good number among 8, 12, 16 and 20.
const arraySize = 8
// VarLengthPool is a pool of VarLength[T] that can be allocated and reset.
type (
VarLengthPool[T any] struct {
arrayPool Pool[varLengthPoolArray[T]]
slicePool Pool[[]T]
}
// varLengthPoolArray wraps an array and keeps track of the next index to be used to avoid the heap allocation.
varLengthPoolArray[T any] struct {
arr [arraySize]T
next int
}
)
// VarLength is a variable length array that can be reused via a pool.
type VarLength[T any] struct {
arr *varLengthPoolArray[T]
slc *[]T
}
// NewVarLengthPool returns a new VarLengthPool.
func NewVarLengthPool[T any]() VarLengthPool[T] {
return VarLengthPool[T]{
arrayPool: NewPool[varLengthPoolArray[T]](func(v *varLengthPoolArray[T]) {
v.next = 0
}),
slicePool: NewPool[[]T](func(i *[]T) {
*i = (*i)[:0]
}),
}
}
// NewNilVarLength returns a new VarLength[T] with a nil backing.
func NewNilVarLength[T any]() VarLength[T] {
return VarLength[T]{}
}
// Allocate allocates a new VarLength[T] from the pool.
func (p *VarLengthPool[T]) Allocate(knownMin int) VarLength[T] {
if knownMin <= arraySize {
arr := p.arrayPool.Allocate()
return VarLength[T]{arr: arr}
}
slc := p.slicePool.Allocate()
return VarLength[T]{slc: slc}
}
// Reset resets the pool.
func (p *VarLengthPool[T]) Reset() {
p.arrayPool.Reset()
p.slicePool.Reset()
}
// Append appends items to the backing slice just like the `append` builtin function in Go.
func (i VarLength[T]) Append(p *VarLengthPool[T], items ...T) VarLength[T] {
if i.slc != nil {
*i.slc = append(*i.slc, items...)
return i
}
if i.arr == nil {
i.arr = p.arrayPool.Allocate()
}
arr := i.arr
if arr.next+len(items) <= arraySize {
for _, item := range items {
arr.arr[arr.next] = item
arr.next++
}
} else {
slc := p.slicePool.Allocate()
// Copy the array to the slice.
for ptr := 0; ptr < arr.next; ptr++ {
*slc = append(*slc, arr.arr[ptr])
}
i.slc = slc
*i.slc = append(*i.slc, items...)
}
return i
}
// View returns the backing slice.
func (i VarLength[T]) View() []T {
if i.slc != nil {
return *i.slc
} else if i.arr != nil {
arr := i.arr
return arr.arr[:arr.next]
}
return nil
}
// Cut cuts the backing slice to the given length.
// Precondition: n <= len(i.backing).
func (i VarLength[T]) Cut(n int) {
if i.slc != nil {
*i.slc = (*i.slc)[:n]
} else if i.arr != nil {
i.arr.next = n
}
}

View File

@ -0,0 +1,15 @@
package wazevoapi
import "unsafe"
// PtrFromUintptr resurrects the original *T from the given uintptr.
// The caller of this function MUST be sure that ptr is valid.
func PtrFromUintptr[T any](ptr uintptr) *T {
// Wraps ptrs as the double pointer in order to avoid the unsafe access as detected by race detector.
//
// For example, if we have (*function)(unsafe.Pointer(ptr)) instead, then the race detector's "checkptr"
// subroutine wanrs as "checkptr: pointer arithmetic result points to invalid allocation"
// https://github.com/golang/go/blob/1ce7fcf139417d618c2730010ede2afb41664211/src/runtime/checkptr.go#L69
var wrapped *uintptr = &ptr
return *(**T)(unsafe.Pointer(wrapped))
}

View File

@ -0,0 +1,26 @@
package wazevoapi
// Queue is the resettable queue where the underlying slice is reused.
type Queue[T any] struct {
index int
Data []T
}
func (q *Queue[T]) Enqueue(v T) {
q.Data = append(q.Data, v)
}
func (q *Queue[T]) Dequeue() (ret T) {
ret = q.Data[q.index]
q.index++
return
}
func (q *Queue[T]) Empty() bool {
return q.index >= len(q.Data)
}
func (q *Queue[T]) Reset() {
q.index = 0
q.Data = q.Data[:0]
}

View File

@ -0,0 +1,13 @@
package wazevoapi
// ResetMap resets the map to an empty state, or creates a new map if it is nil.
func ResetMap[K comparable, V any](m map[K]V) map[K]V {
if m == nil {
m = make(map[K]V)
} else {
for v := range m {
delete(m, v)
}
}
return m
}

Some files were not shown because too many files have changed in this diff Show More