mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[experiment] add alternative wasm sqlite3 implementation available via build-tag (#2863)
This allows for building GoToSocial with [SQLite transpiled to WASM](https://github.com/ncruces/go-sqlite3) and accessed through [Wazero](https://wazero.io/).
This commit is contained in:
170
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/abi.go
generated
vendored
Normal file
170
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/abi.go
generated
vendored
Normal file
@ -0,0 +1,170 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
type (
|
||||
// FunctionABI represents the ABI information for a function which corresponds to a ssa.Signature.
|
||||
FunctionABI struct {
|
||||
Initialized bool
|
||||
|
||||
Args, Rets []ABIArg
|
||||
ArgStackSize, RetStackSize int64
|
||||
|
||||
ArgIntRealRegs byte
|
||||
ArgFloatRealRegs byte
|
||||
RetIntRealRegs byte
|
||||
RetFloatRealRegs byte
|
||||
}
|
||||
|
||||
// ABIArg represents either argument or return value's location.
|
||||
ABIArg struct {
|
||||
// Index is the index of the argument.
|
||||
Index int
|
||||
// Kind is the kind of the argument.
|
||||
Kind ABIArgKind
|
||||
// Reg is valid if Kind == ABIArgKindReg.
|
||||
// This VReg must be based on RealReg.
|
||||
Reg regalloc.VReg
|
||||
// Offset is valid if Kind == ABIArgKindStack.
|
||||
// This is the offset from the beginning of either arg or ret stack slot.
|
||||
Offset int64
|
||||
// Type is the type of the argument.
|
||||
Type ssa.Type
|
||||
}
|
||||
|
||||
// ABIArgKind is the kind of ABI argument.
|
||||
ABIArgKind byte
|
||||
)
|
||||
|
||||
const (
|
||||
// ABIArgKindReg represents an argument passed in a register.
|
||||
ABIArgKindReg = iota
|
||||
// ABIArgKindStack represents an argument passed in the stack.
|
||||
ABIArgKindStack
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (a *ABIArg) String() string {
|
||||
return fmt.Sprintf("args[%d]: %s", a.Index, a.Kind)
|
||||
}
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (a ABIArgKind) String() string {
|
||||
switch a {
|
||||
case ABIArgKindReg:
|
||||
return "reg"
|
||||
case ABIArgKindStack:
|
||||
return "stack"
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
}
|
||||
|
||||
// Init initializes the abiImpl for the given signature.
|
||||
func (a *FunctionABI) Init(sig *ssa.Signature, argResultInts, argResultFloats []regalloc.RealReg) {
|
||||
if len(a.Rets) < len(sig.Results) {
|
||||
a.Rets = make([]ABIArg, len(sig.Results))
|
||||
}
|
||||
a.Rets = a.Rets[:len(sig.Results)]
|
||||
a.RetStackSize = a.setABIArgs(a.Rets, sig.Results, argResultInts, argResultFloats)
|
||||
if argsNum := len(sig.Params); len(a.Args) < argsNum {
|
||||
a.Args = make([]ABIArg, argsNum)
|
||||
}
|
||||
a.Args = a.Args[:len(sig.Params)]
|
||||
a.ArgStackSize = a.setABIArgs(a.Args, sig.Params, argResultInts, argResultFloats)
|
||||
|
||||
// Gather the real registers usages in arg/return.
|
||||
a.ArgIntRealRegs, a.ArgFloatRealRegs = 0, 0
|
||||
a.RetIntRealRegs, a.RetFloatRealRegs = 0, 0
|
||||
for i := range a.Rets {
|
||||
r := &a.Rets[i]
|
||||
if r.Kind == ABIArgKindReg {
|
||||
if r.Type.IsInt() {
|
||||
a.RetIntRealRegs++
|
||||
} else {
|
||||
a.RetFloatRealRegs++
|
||||
}
|
||||
}
|
||||
}
|
||||
for i := range a.Args {
|
||||
arg := &a.Args[i]
|
||||
if arg.Kind == ABIArgKindReg {
|
||||
if arg.Type.IsInt() {
|
||||
a.ArgIntRealRegs++
|
||||
} else {
|
||||
a.ArgFloatRealRegs++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
a.Initialized = true
|
||||
}
|
||||
|
||||
// setABIArgs sets the ABI arguments in the given slice. This assumes that len(s) >= len(types)
|
||||
// where if len(s) > len(types), the last elements of s is for the multi-return slot.
|
||||
func (a *FunctionABI) setABIArgs(s []ABIArg, types []ssa.Type, ints, floats []regalloc.RealReg) (stackSize int64) {
|
||||
il, fl := len(ints), len(floats)
|
||||
|
||||
var stackOffset int64
|
||||
intParamIndex, floatParamIndex := 0, 0
|
||||
for i, typ := range types {
|
||||
arg := &s[i]
|
||||
arg.Index = i
|
||||
arg.Type = typ
|
||||
if typ.IsInt() {
|
||||
if intParamIndex >= il {
|
||||
arg.Kind = ABIArgKindStack
|
||||
const slotSize = 8 // Align 8 bytes.
|
||||
arg.Offset = stackOffset
|
||||
stackOffset += slotSize
|
||||
} else {
|
||||
arg.Kind = ABIArgKindReg
|
||||
arg.Reg = regalloc.FromRealReg(ints[intParamIndex], regalloc.RegTypeInt)
|
||||
intParamIndex++
|
||||
}
|
||||
} else {
|
||||
if floatParamIndex >= fl {
|
||||
arg.Kind = ABIArgKindStack
|
||||
slotSize := int64(8) // Align at least 8 bytes.
|
||||
if typ.Bits() == 128 { // Vector.
|
||||
slotSize = 16
|
||||
}
|
||||
arg.Offset = stackOffset
|
||||
stackOffset += slotSize
|
||||
} else {
|
||||
arg.Kind = ABIArgKindReg
|
||||
arg.Reg = regalloc.FromRealReg(floats[floatParamIndex], regalloc.RegTypeFloat)
|
||||
floatParamIndex++
|
||||
}
|
||||
}
|
||||
}
|
||||
return stackOffset
|
||||
}
|
||||
|
||||
func (a *FunctionABI) AlignedArgResultStackSlotSize() uint32 {
|
||||
stackSlotSize := a.RetStackSize + a.ArgStackSize
|
||||
// Align stackSlotSize to 16 bytes.
|
||||
stackSlotSize = (stackSlotSize + 15) &^ 15
|
||||
// Check overflow 32-bit.
|
||||
if stackSlotSize > 0xFFFFFFFF {
|
||||
panic("ABI stack slot size overflow")
|
||||
}
|
||||
return uint32(stackSlotSize)
|
||||
}
|
||||
|
||||
func (a *FunctionABI) ABIInfoAsUint64() uint64 {
|
||||
return uint64(a.ArgIntRealRegs)<<56 |
|
||||
uint64(a.ArgFloatRealRegs)<<48 |
|
||||
uint64(a.RetIntRealRegs)<<40 |
|
||||
uint64(a.RetFloatRealRegs)<<32 |
|
||||
uint64(a.AlignedArgResultStackSlotSize())
|
||||
}
|
||||
|
||||
func ABIInfoFromUint64(info uint64) (argIntRealRegs, argFloatRealRegs, retIntRealRegs, retFloatRealRegs byte, stackSlotSize uint32) {
|
||||
return byte(info >> 56), byte(info >> 48), byte(info >> 40), byte(info >> 32), uint32(info)
|
||||
}
|
3
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/backend.go
generated
vendored
Normal file
3
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/backend.go
generated
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
// Package backend must be free of Wasm-specific concept. In other words,
|
||||
// this package must not import internal/wasm package.
|
||||
package backend
|
417
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
generated
vendored
Normal file
417
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
generated
vendored
Normal file
@ -0,0 +1,417 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
// NewCompiler returns a new Compiler that can generate a machine code.
|
||||
func NewCompiler(ctx context.Context, mach Machine, builder ssa.Builder) Compiler {
|
||||
return newCompiler(ctx, mach, builder)
|
||||
}
|
||||
|
||||
func newCompiler(_ context.Context, mach Machine, builder ssa.Builder) *compiler {
|
||||
argResultInts, argResultFloats := mach.ArgsResultsRegs()
|
||||
c := &compiler{
|
||||
mach: mach, ssaBuilder: builder,
|
||||
nextVRegID: regalloc.VRegIDNonReservedBegin,
|
||||
argResultInts: argResultInts,
|
||||
argResultFloats: argResultFloats,
|
||||
}
|
||||
mach.SetCompiler(c)
|
||||
return c
|
||||
}
|
||||
|
||||
// Compiler is the backend of wazevo which takes ssa.Builder and Machine,
|
||||
// use the information there to emit the final machine code.
|
||||
type Compiler interface {
|
||||
// SSABuilder returns the ssa.Builder used by this compiler.
|
||||
SSABuilder() ssa.Builder
|
||||
|
||||
// Compile executes the following steps:
|
||||
// 1. Lower()
|
||||
// 2. RegAlloc()
|
||||
// 3. Finalize()
|
||||
// 4. Encode()
|
||||
//
|
||||
// Each step can be called individually for testing purpose, therefore they are exposed in this interface too.
|
||||
//
|
||||
// The returned byte slices are the machine code and the relocation information for the machine code.
|
||||
// The caller is responsible for copying them immediately since the compiler may reuse the buffer.
|
||||
Compile(ctx context.Context) (_ []byte, _ []RelocationInfo, _ error)
|
||||
|
||||
// Lower lowers the given ssa.Instruction to the machine-specific instructions.
|
||||
Lower()
|
||||
|
||||
// RegAlloc performs the register allocation after Lower is called.
|
||||
RegAlloc()
|
||||
|
||||
// Finalize performs the finalization of the compilation, including machine code emission.
|
||||
// This must be called after RegAlloc.
|
||||
Finalize(ctx context.Context) error
|
||||
|
||||
// Buf returns the buffer of the encoded machine code. This is only used for testing purpose.
|
||||
Buf() []byte
|
||||
|
||||
BufPtr() *[]byte
|
||||
|
||||
// Format returns the debug string of the current state of the compiler.
|
||||
Format() string
|
||||
|
||||
// Init initializes the internal state of the compiler for the next compilation.
|
||||
Init()
|
||||
|
||||
// AllocateVReg allocates a new virtual register of the given type.
|
||||
AllocateVReg(typ ssa.Type) regalloc.VReg
|
||||
|
||||
// ValueDefinition returns the definition of the given value.
|
||||
ValueDefinition(ssa.Value) *SSAValueDefinition
|
||||
|
||||
// VRegOf returns the virtual register of the given ssa.Value.
|
||||
VRegOf(value ssa.Value) regalloc.VReg
|
||||
|
||||
// TypeOf returns the ssa.Type of the given virtual register.
|
||||
TypeOf(regalloc.VReg) ssa.Type
|
||||
|
||||
// MatchInstr returns true if the given definition is from an instruction with the given opcode, the current group ID,
|
||||
// and a refcount of 1. That means, the instruction can be merged/swapped within the current instruction group.
|
||||
MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool
|
||||
|
||||
// MatchInstrOneOf is the same as MatchInstr but for multiple opcodes. If it matches one of ssa.Opcode,
|
||||
// this returns the opcode. Otherwise, this returns ssa.OpcodeInvalid.
|
||||
//
|
||||
// Note: caller should be careful to avoid excessive allocation on opcodes slice.
|
||||
MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode
|
||||
|
||||
// AddRelocationInfo appends the relocation information for the function reference at the current buffer offset.
|
||||
AddRelocationInfo(funcRef ssa.FuncRef)
|
||||
|
||||
// AddSourceOffsetInfo appends the source offset information for the given offset.
|
||||
AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset)
|
||||
|
||||
// SourceOffsetInfo returns the source offset information for the current buffer offset.
|
||||
SourceOffsetInfo() []SourceOffsetInfo
|
||||
|
||||
// EmitByte appends a byte to the buffer. Used during the code emission.
|
||||
EmitByte(b byte)
|
||||
|
||||
// Emit4Bytes appends 4 bytes to the buffer. Used during the code emission.
|
||||
Emit4Bytes(b uint32)
|
||||
|
||||
// Emit8Bytes appends 8 bytes to the buffer. Used during the code emission.
|
||||
Emit8Bytes(b uint64)
|
||||
|
||||
// GetFunctionABI returns the ABI information for the given signature.
|
||||
GetFunctionABI(sig *ssa.Signature) *FunctionABI
|
||||
}
|
||||
|
||||
// RelocationInfo represents the relocation information for a call instruction.
|
||||
type RelocationInfo struct {
|
||||
// Offset represents the offset from the beginning of the machine code of either a function or the entire module.
|
||||
Offset int64
|
||||
// Target is the target function of the call instruction.
|
||||
FuncRef ssa.FuncRef
|
||||
}
|
||||
|
||||
// compiler implements Compiler.
|
||||
type compiler struct {
|
||||
mach Machine
|
||||
currentGID ssa.InstructionGroupID
|
||||
ssaBuilder ssa.Builder
|
||||
// nextVRegID is the next virtual register ID to be allocated.
|
||||
nextVRegID regalloc.VRegID
|
||||
// ssaValueToVRegs maps ssa.ValueID to regalloc.VReg.
|
||||
ssaValueToVRegs [] /* VRegID to */ regalloc.VReg
|
||||
// ssaValueDefinitions maps ssa.ValueID to its definition.
|
||||
ssaValueDefinitions []SSAValueDefinition
|
||||
// ssaValueRefCounts is a cached list obtained by ssa.Builder.ValueRefCounts().
|
||||
ssaValueRefCounts []int
|
||||
// returnVRegs is the list of virtual registers that store the return values.
|
||||
returnVRegs []regalloc.VReg
|
||||
varEdges [][2]regalloc.VReg
|
||||
varEdgeTypes []ssa.Type
|
||||
constEdges []struct {
|
||||
cInst *ssa.Instruction
|
||||
dst regalloc.VReg
|
||||
}
|
||||
vRegSet []bool
|
||||
vRegIDs []regalloc.VRegID
|
||||
tempRegs []regalloc.VReg
|
||||
tmpVals []ssa.Value
|
||||
ssaTypeOfVRegID [] /* VRegID to */ ssa.Type
|
||||
buf []byte
|
||||
relocations []RelocationInfo
|
||||
sourceOffsets []SourceOffsetInfo
|
||||
// abis maps ssa.SignatureID to the ABI implementation.
|
||||
abis []FunctionABI
|
||||
argResultInts, argResultFloats []regalloc.RealReg
|
||||
}
|
||||
|
||||
// SourceOffsetInfo is a data to associate the source offset with the executable offset.
|
||||
type SourceOffsetInfo struct {
|
||||
// SourceOffset is the source offset in the original source code.
|
||||
SourceOffset ssa.SourceOffset
|
||||
// ExecutableOffset is the offset in the compiled executable.
|
||||
ExecutableOffset int64
|
||||
}
|
||||
|
||||
// Compile implements Compiler.Compile.
|
||||
func (c *compiler) Compile(ctx context.Context) ([]byte, []RelocationInfo, error) {
|
||||
c.Lower()
|
||||
if wazevoapi.PrintSSAToBackendIRLowering && wazevoapi.PrintEnabledIndex(ctx) {
|
||||
fmt.Printf("[[[after lowering for %s ]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
|
||||
}
|
||||
if wazevoapi.DeterministicCompilationVerifierEnabled {
|
||||
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After lowering to ISA specific IR", c.Format())
|
||||
}
|
||||
c.RegAlloc()
|
||||
if wazevoapi.PrintRegisterAllocated && wazevoapi.PrintEnabledIndex(ctx) {
|
||||
fmt.Printf("[[[after regalloc for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
|
||||
}
|
||||
if wazevoapi.DeterministicCompilationVerifierEnabled {
|
||||
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Register Allocation", c.Format())
|
||||
}
|
||||
if err := c.Finalize(ctx); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if wazevoapi.PrintFinalizedMachineCode && wazevoapi.PrintEnabledIndex(ctx) {
|
||||
fmt.Printf("[[[after finalize for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
|
||||
}
|
||||
if wazevoapi.DeterministicCompilationVerifierEnabled {
|
||||
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Finalization", c.Format())
|
||||
}
|
||||
return c.buf, c.relocations, nil
|
||||
}
|
||||
|
||||
// RegAlloc implements Compiler.RegAlloc.
|
||||
func (c *compiler) RegAlloc() {
|
||||
c.mach.RegAlloc()
|
||||
}
|
||||
|
||||
// Finalize implements Compiler.Finalize.
|
||||
func (c *compiler) Finalize(ctx context.Context) error {
|
||||
c.mach.PostRegAlloc()
|
||||
return c.mach.Encode(ctx)
|
||||
}
|
||||
|
||||
// setCurrentGroupID sets the current instruction group ID.
|
||||
func (c *compiler) setCurrentGroupID(gid ssa.InstructionGroupID) {
|
||||
c.currentGID = gid
|
||||
}
|
||||
|
||||
// assignVirtualRegisters assigns a virtual register to each ssa.ValueID Valid in the ssa.Builder.
|
||||
func (c *compiler) assignVirtualRegisters() {
|
||||
builder := c.ssaBuilder
|
||||
refCounts := builder.ValueRefCounts()
|
||||
c.ssaValueRefCounts = refCounts
|
||||
|
||||
need := len(refCounts)
|
||||
if need >= len(c.ssaValueToVRegs) {
|
||||
c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, need+1)...)
|
||||
}
|
||||
if need >= len(c.ssaValueDefinitions) {
|
||||
c.ssaValueDefinitions = append(c.ssaValueDefinitions, make([]SSAValueDefinition, need+1)...)
|
||||
}
|
||||
|
||||
for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
|
||||
// First we assign a virtual register to each parameter.
|
||||
for i := 0; i < blk.Params(); i++ {
|
||||
p := blk.Param(i)
|
||||
pid := p.ID()
|
||||
typ := p.Type()
|
||||
vreg := c.AllocateVReg(typ)
|
||||
c.ssaValueToVRegs[pid] = vreg
|
||||
c.ssaValueDefinitions[pid] = SSAValueDefinition{BlockParamValue: p, BlkParamVReg: vreg}
|
||||
c.ssaTypeOfVRegID[vreg.ID()] = p.Type()
|
||||
}
|
||||
|
||||
// Assigns each value to a virtual register produced by instructions.
|
||||
for cur := blk.Root(); cur != nil; cur = cur.Next() {
|
||||
r, rs := cur.Returns()
|
||||
var N int
|
||||
if r.Valid() {
|
||||
id := r.ID()
|
||||
ssaTyp := r.Type()
|
||||
typ := r.Type()
|
||||
vReg := c.AllocateVReg(typ)
|
||||
c.ssaValueToVRegs[id] = vReg
|
||||
c.ssaValueDefinitions[id] = SSAValueDefinition{
|
||||
Instr: cur,
|
||||
N: 0,
|
||||
RefCount: refCounts[id],
|
||||
}
|
||||
c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
|
||||
N++
|
||||
}
|
||||
for _, r := range rs {
|
||||
id := r.ID()
|
||||
ssaTyp := r.Type()
|
||||
vReg := c.AllocateVReg(ssaTyp)
|
||||
c.ssaValueToVRegs[id] = vReg
|
||||
c.ssaValueDefinitions[id] = SSAValueDefinition{
|
||||
Instr: cur,
|
||||
N: N,
|
||||
RefCount: refCounts[id],
|
||||
}
|
||||
c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
|
||||
N++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i, retBlk := 0, builder.ReturnBlock(); i < retBlk.Params(); i++ {
|
||||
typ := retBlk.Param(i).Type()
|
||||
vReg := c.AllocateVReg(typ)
|
||||
c.returnVRegs = append(c.returnVRegs, vReg)
|
||||
c.ssaTypeOfVRegID[vReg.ID()] = typ
|
||||
}
|
||||
}
|
||||
|
||||
// AllocateVReg implements Compiler.AllocateVReg.
|
||||
func (c *compiler) AllocateVReg(typ ssa.Type) regalloc.VReg {
|
||||
regType := regalloc.RegTypeOf(typ)
|
||||
r := regalloc.VReg(c.nextVRegID).SetRegType(regType)
|
||||
|
||||
id := r.ID()
|
||||
if int(id) >= len(c.ssaTypeOfVRegID) {
|
||||
c.ssaTypeOfVRegID = append(c.ssaTypeOfVRegID, make([]ssa.Type, id+1)...)
|
||||
}
|
||||
c.ssaTypeOfVRegID[id] = typ
|
||||
c.nextVRegID++
|
||||
return r
|
||||
}
|
||||
|
||||
// Init implements Compiler.Init.
|
||||
func (c *compiler) Init() {
|
||||
c.currentGID = 0
|
||||
c.nextVRegID = regalloc.VRegIDNonReservedBegin
|
||||
c.returnVRegs = c.returnVRegs[:0]
|
||||
c.mach.Reset()
|
||||
c.varEdges = c.varEdges[:0]
|
||||
c.constEdges = c.constEdges[:0]
|
||||
c.buf = c.buf[:0]
|
||||
c.sourceOffsets = c.sourceOffsets[:0]
|
||||
c.relocations = c.relocations[:0]
|
||||
}
|
||||
|
||||
// ValueDefinition implements Compiler.ValueDefinition.
|
||||
func (c *compiler) ValueDefinition(value ssa.Value) *SSAValueDefinition {
|
||||
return &c.ssaValueDefinitions[value.ID()]
|
||||
}
|
||||
|
||||
// VRegOf implements Compiler.VRegOf.
|
||||
func (c *compiler) VRegOf(value ssa.Value) regalloc.VReg {
|
||||
return c.ssaValueToVRegs[value.ID()]
|
||||
}
|
||||
|
||||
// Format implements Compiler.Format.
|
||||
func (c *compiler) Format() string {
|
||||
return c.mach.Format()
|
||||
}
|
||||
|
||||
// TypeOf implements Compiler.Format.
|
||||
func (c *compiler) TypeOf(v regalloc.VReg) ssa.Type {
|
||||
return c.ssaTypeOfVRegID[v.ID()]
|
||||
}
|
||||
|
||||
// MatchInstr implements Compiler.MatchInstr.
|
||||
func (c *compiler) MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool {
|
||||
instr := def.Instr
|
||||
return def.IsFromInstr() &&
|
||||
instr.Opcode() == opcode &&
|
||||
instr.GroupID() == c.currentGID &&
|
||||
def.RefCount < 2
|
||||
}
|
||||
|
||||
// MatchInstrOneOf implements Compiler.MatchInstrOneOf.
|
||||
func (c *compiler) MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode {
|
||||
instr := def.Instr
|
||||
if !def.IsFromInstr() {
|
||||
return ssa.OpcodeInvalid
|
||||
}
|
||||
|
||||
if instr.GroupID() != c.currentGID {
|
||||
return ssa.OpcodeInvalid
|
||||
}
|
||||
|
||||
if def.RefCount >= 2 {
|
||||
return ssa.OpcodeInvalid
|
||||
}
|
||||
|
||||
opcode := instr.Opcode()
|
||||
for _, op := range opcodes {
|
||||
if opcode == op {
|
||||
return opcode
|
||||
}
|
||||
}
|
||||
return ssa.OpcodeInvalid
|
||||
}
|
||||
|
||||
// SSABuilder implements Compiler .SSABuilder.
|
||||
func (c *compiler) SSABuilder() ssa.Builder {
|
||||
return c.ssaBuilder
|
||||
}
|
||||
|
||||
// AddSourceOffsetInfo implements Compiler.AddSourceOffsetInfo.
|
||||
func (c *compiler) AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) {
|
||||
c.sourceOffsets = append(c.sourceOffsets, SourceOffsetInfo{
|
||||
SourceOffset: sourceOffset,
|
||||
ExecutableOffset: executableOffset,
|
||||
})
|
||||
}
|
||||
|
||||
// SourceOffsetInfo implements Compiler.SourceOffsetInfo.
|
||||
func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo {
|
||||
return c.sourceOffsets
|
||||
}
|
||||
|
||||
// AddRelocationInfo implements Compiler.AddRelocationInfo.
|
||||
func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) {
|
||||
c.relocations = append(c.relocations, RelocationInfo{
|
||||
Offset: int64(len(c.buf)),
|
||||
FuncRef: funcRef,
|
||||
})
|
||||
}
|
||||
|
||||
// Emit8Bytes implements Compiler.Emit8Bytes.
|
||||
func (c *compiler) Emit8Bytes(b uint64) {
|
||||
c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24), byte(b>>32), byte(b>>40), byte(b>>48), byte(b>>56))
|
||||
}
|
||||
|
||||
// Emit4Bytes implements Compiler.Emit4Bytes.
|
||||
func (c *compiler) Emit4Bytes(b uint32) {
|
||||
c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24))
|
||||
}
|
||||
|
||||
// EmitByte implements Compiler.EmitByte.
|
||||
func (c *compiler) EmitByte(b byte) {
|
||||
c.buf = append(c.buf, b)
|
||||
}
|
||||
|
||||
// Buf implements Compiler.Buf.
|
||||
func (c *compiler) Buf() []byte {
|
||||
return c.buf
|
||||
}
|
||||
|
||||
// BufPtr implements Compiler.BufPtr.
|
||||
func (c *compiler) BufPtr() *[]byte {
|
||||
return &c.buf
|
||||
}
|
||||
|
||||
func (c *compiler) GetFunctionABI(sig *ssa.Signature) *FunctionABI {
|
||||
if int(sig.ID) >= len(c.abis) {
|
||||
c.abis = append(c.abis, make([]FunctionABI, int(sig.ID)+1)...)
|
||||
}
|
||||
|
||||
abi := &c.abis[sig.ID]
|
||||
if abi.Initialized {
|
||||
return abi
|
||||
}
|
||||
|
||||
abi.Init(sig, c.argResultInts, c.argResultFloats)
|
||||
return abi
|
||||
}
|
226
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go
generated
vendored
Normal file
226
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go
generated
vendored
Normal file
@ -0,0 +1,226 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// Lower implements Compiler.Lower.
|
||||
func (c *compiler) Lower() {
|
||||
c.assignVirtualRegisters()
|
||||
c.mach.SetCurrentABI(c.GetFunctionABI(c.ssaBuilder.Signature()))
|
||||
c.mach.ExecutableContext().StartLoweringFunction(c.ssaBuilder.BlockIDMax())
|
||||
c.lowerBlocks()
|
||||
}
|
||||
|
||||
// lowerBlocks lowers each block in the ssa.Builder.
|
||||
func (c *compiler) lowerBlocks() {
|
||||
builder := c.ssaBuilder
|
||||
for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
|
||||
c.lowerBlock(blk)
|
||||
}
|
||||
|
||||
ectx := c.mach.ExecutableContext()
|
||||
// After lowering all blocks, we need to link adjacent blocks to layout one single instruction list.
|
||||
var prev ssa.BasicBlock
|
||||
for next := builder.BlockIteratorReversePostOrderBegin(); next != nil; next = builder.BlockIteratorReversePostOrderNext() {
|
||||
if prev != nil {
|
||||
ectx.LinkAdjacentBlocks(prev, next)
|
||||
}
|
||||
prev = next
|
||||
}
|
||||
}
|
||||
|
||||
func (c *compiler) lowerBlock(blk ssa.BasicBlock) {
|
||||
mach := c.mach
|
||||
ectx := mach.ExecutableContext()
|
||||
ectx.StartBlock(blk)
|
||||
|
||||
// We traverse the instructions in reverse order because we might want to lower multiple
|
||||
// instructions together.
|
||||
cur := blk.Tail()
|
||||
|
||||
// First gather the branching instructions at the end of the blocks.
|
||||
var br0, br1 *ssa.Instruction
|
||||
if cur.IsBranching() {
|
||||
br0 = cur
|
||||
cur = cur.Prev()
|
||||
if cur != nil && cur.IsBranching() {
|
||||
br1 = cur
|
||||
cur = cur.Prev()
|
||||
}
|
||||
}
|
||||
|
||||
if br0 != nil {
|
||||
c.lowerBranches(br0, br1)
|
||||
}
|
||||
|
||||
if br1 != nil && br0 == nil {
|
||||
panic("BUG? when a block has conditional branch but doesn't end with an unconditional branch?")
|
||||
}
|
||||
|
||||
// Now start lowering the non-branching instructions.
|
||||
for ; cur != nil; cur = cur.Prev() {
|
||||
c.setCurrentGroupID(cur.GroupID())
|
||||
if cur.Lowered() {
|
||||
continue
|
||||
}
|
||||
|
||||
switch cur.Opcode() {
|
||||
case ssa.OpcodeReturn:
|
||||
rets := cur.ReturnVals()
|
||||
if len(rets) > 0 {
|
||||
c.mach.LowerReturns(rets)
|
||||
}
|
||||
c.mach.InsertReturn()
|
||||
default:
|
||||
mach.LowerInstr(cur)
|
||||
}
|
||||
ectx.FlushPendingInstructions()
|
||||
}
|
||||
|
||||
// Finally, if this is the entry block, we have to insert copies of arguments from the real location to the VReg.
|
||||
if blk.EntryBlock() {
|
||||
c.lowerFunctionArguments(blk)
|
||||
}
|
||||
|
||||
ectx.EndBlock()
|
||||
}
|
||||
|
||||
// lowerBranches is called right after StartBlock and before any LowerInstr call if
|
||||
// there are branches to the given block. br0 is the very end of the block and b1 is the before the br0 if it exists.
|
||||
// At least br0 is not nil, but br1 can be nil if there's no branching before br0.
|
||||
//
|
||||
// See ssa.Instruction IsBranching, and the comment on ssa.BasicBlock.
|
||||
func (c *compiler) lowerBranches(br0, br1 *ssa.Instruction) {
|
||||
ectx := c.mach.ExecutableContext()
|
||||
|
||||
c.setCurrentGroupID(br0.GroupID())
|
||||
c.mach.LowerSingleBranch(br0)
|
||||
ectx.FlushPendingInstructions()
|
||||
if br1 != nil {
|
||||
c.setCurrentGroupID(br1.GroupID())
|
||||
c.mach.LowerConditionalBranch(br1)
|
||||
ectx.FlushPendingInstructions()
|
||||
}
|
||||
|
||||
if br0.Opcode() == ssa.OpcodeJump {
|
||||
_, args, target := br0.BranchData()
|
||||
argExists := len(args) != 0
|
||||
if argExists && br1 != nil {
|
||||
panic("BUG: critical edge split failed")
|
||||
}
|
||||
if argExists && target.ReturnBlock() {
|
||||
if len(args) > 0 {
|
||||
c.mach.LowerReturns(args)
|
||||
}
|
||||
} else if argExists {
|
||||
c.lowerBlockArguments(args, target)
|
||||
}
|
||||
}
|
||||
ectx.FlushPendingInstructions()
|
||||
}
|
||||
|
||||
func (c *compiler) lowerFunctionArguments(entry ssa.BasicBlock) {
|
||||
ectx := c.mach.ExecutableContext()
|
||||
|
||||
c.tmpVals = c.tmpVals[:0]
|
||||
for i := 0; i < entry.Params(); i++ {
|
||||
p := entry.Param(i)
|
||||
if c.ssaValueRefCounts[p.ID()] > 0 {
|
||||
c.tmpVals = append(c.tmpVals, p)
|
||||
} else {
|
||||
// If the argument is not used, we can just pass an invalid value.
|
||||
c.tmpVals = append(c.tmpVals, ssa.ValueInvalid)
|
||||
}
|
||||
}
|
||||
c.mach.LowerParams(c.tmpVals)
|
||||
ectx.FlushPendingInstructions()
|
||||
}
|
||||
|
||||
// lowerBlockArguments lowers how to pass arguments to the given successor block.
|
||||
func (c *compiler) lowerBlockArguments(args []ssa.Value, succ ssa.BasicBlock) {
|
||||
if len(args) != succ.Params() {
|
||||
panic("BUG: mismatched number of arguments")
|
||||
}
|
||||
|
||||
c.varEdges = c.varEdges[:0]
|
||||
c.varEdgeTypes = c.varEdgeTypes[:0]
|
||||
c.constEdges = c.constEdges[:0]
|
||||
for i := 0; i < len(args); i++ {
|
||||
dst := succ.Param(i)
|
||||
src := args[i]
|
||||
|
||||
dstReg := c.VRegOf(dst)
|
||||
srcDef := c.ssaValueDefinitions[src.ID()]
|
||||
if srcDef.IsFromInstr() && srcDef.Instr.Constant() {
|
||||
c.constEdges = append(c.constEdges, struct {
|
||||
cInst *ssa.Instruction
|
||||
dst regalloc.VReg
|
||||
}{cInst: srcDef.Instr, dst: dstReg})
|
||||
} else {
|
||||
srcReg := c.VRegOf(src)
|
||||
// Even when the src=dst, insert the move so that we can keep such registers keep-alive.
|
||||
c.varEdges = append(c.varEdges, [2]regalloc.VReg{srcReg, dstReg})
|
||||
c.varEdgeTypes = append(c.varEdgeTypes, src.Type())
|
||||
}
|
||||
}
|
||||
|
||||
// Check if there's an overlap among the dsts and srcs in varEdges.
|
||||
c.vRegIDs = c.vRegIDs[:0]
|
||||
for _, edge := range c.varEdges {
|
||||
src := edge[0].ID()
|
||||
if int(src) >= len(c.vRegSet) {
|
||||
c.vRegSet = append(c.vRegSet, make([]bool, src+1)...)
|
||||
}
|
||||
c.vRegSet[src] = true
|
||||
c.vRegIDs = append(c.vRegIDs, src)
|
||||
}
|
||||
separated := true
|
||||
for _, edge := range c.varEdges {
|
||||
dst := edge[1].ID()
|
||||
if int(dst) >= len(c.vRegSet) {
|
||||
c.vRegSet = append(c.vRegSet, make([]bool, dst+1)...)
|
||||
} else {
|
||||
if c.vRegSet[dst] {
|
||||
separated = false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, id := range c.vRegIDs {
|
||||
c.vRegSet[id] = false // reset for the next use.
|
||||
}
|
||||
|
||||
if separated {
|
||||
// If there's no overlap, we can simply move the source to destination.
|
||||
for i, edge := range c.varEdges {
|
||||
src, dst := edge[0], edge[1]
|
||||
c.mach.InsertMove(dst, src, c.varEdgeTypes[i])
|
||||
}
|
||||
} else {
|
||||
// Otherwise, we allocate a temporary registers and move the source to the temporary register,
|
||||
//
|
||||
// First move all of them to temporary registers.
|
||||
c.tempRegs = c.tempRegs[:0]
|
||||
for i, edge := range c.varEdges {
|
||||
src := edge[0]
|
||||
typ := c.varEdgeTypes[i]
|
||||
temp := c.AllocateVReg(typ)
|
||||
c.tempRegs = append(c.tempRegs, temp)
|
||||
c.mach.InsertMove(temp, src, typ)
|
||||
}
|
||||
// Then move the temporary registers to the destination.
|
||||
for i, edge := range c.varEdges {
|
||||
temp := c.tempRegs[i]
|
||||
dst := edge[1]
|
||||
c.mach.InsertMove(dst, temp, c.varEdgeTypes[i])
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, move the constants.
|
||||
for _, edge := range c.constEdges {
|
||||
cInst, dst := edge.cInst, edge.dst
|
||||
c.mach.InsertLoadConstantBlockArg(cInst, dst)
|
||||
}
|
||||
}
|
219
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
generated
vendored
Normal file
219
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
generated
vendored
Normal file
@ -0,0 +1,219 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
type ExecutableContext interface {
|
||||
// StartLoweringFunction is called when the lowering of the given function is started.
|
||||
// maximumBlockID is the maximum value of ssa.BasicBlockID existing in the function.
|
||||
StartLoweringFunction(maximumBlockID ssa.BasicBlockID)
|
||||
|
||||
// LinkAdjacentBlocks is called after finished lowering all blocks in order to create one single instruction list.
|
||||
LinkAdjacentBlocks(prev, next ssa.BasicBlock)
|
||||
|
||||
// StartBlock is called when the compilation of the given block is started.
|
||||
// The order of this being called is the reverse post order of the ssa.BasicBlock(s) as we iterate with
|
||||
// ssa.Builder BlockIteratorReversePostOrderBegin and BlockIteratorReversePostOrderEnd.
|
||||
StartBlock(ssa.BasicBlock)
|
||||
|
||||
// EndBlock is called when the compilation of the current block is finished.
|
||||
EndBlock()
|
||||
|
||||
// FlushPendingInstructions flushes the pending instructions to the buffer.
|
||||
// This will be called after the lowering of each SSA Instruction.
|
||||
FlushPendingInstructions()
|
||||
}
|
||||
|
||||
type ExecutableContextT[Instr any] struct {
|
||||
CurrentSSABlk ssa.BasicBlock
|
||||
|
||||
// InstrPool is the InstructionPool of instructions.
|
||||
InstructionPool wazevoapi.Pool[Instr]
|
||||
asNop func(*Instr)
|
||||
setNext func(*Instr, *Instr)
|
||||
setPrev func(*Instr, *Instr)
|
||||
|
||||
// RootInstr is the root instruction of the executable.
|
||||
RootInstr *Instr
|
||||
labelPositionPool wazevoapi.Pool[LabelPosition[Instr]]
|
||||
NextLabel Label
|
||||
// LabelPositions maps a label to the instructions of the region which the label represents.
|
||||
LabelPositions map[Label]*LabelPosition[Instr]
|
||||
OrderedBlockLabels []*LabelPosition[Instr]
|
||||
|
||||
// PerBlockHead and PerBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock.
|
||||
PerBlockHead, PerBlockEnd *Instr
|
||||
// PendingInstructions are the instructions which are not yet emitted into the instruction list.
|
||||
PendingInstructions []*Instr
|
||||
|
||||
// SsaBlockIDToLabels maps an SSA block ID to the label.
|
||||
SsaBlockIDToLabels []Label
|
||||
}
|
||||
|
||||
func NewExecutableContextT[Instr any](
|
||||
resetInstruction func(*Instr),
|
||||
setNext func(*Instr, *Instr),
|
||||
setPrev func(*Instr, *Instr),
|
||||
asNop func(*Instr),
|
||||
) *ExecutableContextT[Instr] {
|
||||
return &ExecutableContextT[Instr]{
|
||||
InstructionPool: wazevoapi.NewPool[Instr](resetInstruction),
|
||||
asNop: asNop,
|
||||
setNext: setNext,
|
||||
setPrev: setPrev,
|
||||
labelPositionPool: wazevoapi.NewPool[LabelPosition[Instr]](resetLabelPosition[Instr]),
|
||||
LabelPositions: make(map[Label]*LabelPosition[Instr]),
|
||||
NextLabel: LabelInvalid,
|
||||
}
|
||||
}
|
||||
|
||||
func resetLabelPosition[T any](l *LabelPosition[T]) {
|
||||
*l = LabelPosition[T]{}
|
||||
}
|
||||
|
||||
// StartLoweringFunction implements ExecutableContext.
|
||||
func (e *ExecutableContextT[Instr]) StartLoweringFunction(max ssa.BasicBlockID) {
|
||||
imax := int(max)
|
||||
if len(e.SsaBlockIDToLabels) <= imax {
|
||||
// Eagerly allocate labels for the blocks since the underlying slice will be used for the next iteration.
|
||||
e.SsaBlockIDToLabels = append(e.SsaBlockIDToLabels, make([]Label, imax+1)...)
|
||||
}
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[Instr]) StartBlock(blk ssa.BasicBlock) {
|
||||
e.CurrentSSABlk = blk
|
||||
|
||||
l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()]
|
||||
if l == LabelInvalid {
|
||||
l = e.AllocateLabel()
|
||||
e.SsaBlockIDToLabels[blk.ID()] = l
|
||||
}
|
||||
|
||||
end := e.allocateNop0()
|
||||
e.PerBlockHead, e.PerBlockEnd = end, end
|
||||
|
||||
labelPos, ok := e.LabelPositions[l]
|
||||
if !ok {
|
||||
labelPos = e.AllocateLabelPosition(l)
|
||||
e.LabelPositions[l] = labelPos
|
||||
}
|
||||
e.OrderedBlockLabels = append(e.OrderedBlockLabels, labelPos)
|
||||
labelPos.Begin, labelPos.End = end, end
|
||||
labelPos.SB = blk
|
||||
}
|
||||
|
||||
// EndBlock implements ExecutableContext.
|
||||
func (e *ExecutableContextT[T]) EndBlock() {
|
||||
// Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions.
|
||||
e.insertAtPerBlockHead(e.allocateNop0())
|
||||
|
||||
l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()]
|
||||
e.LabelPositions[l].Begin = e.PerBlockHead
|
||||
|
||||
if e.CurrentSSABlk.EntryBlock() {
|
||||
e.RootInstr = e.PerBlockHead
|
||||
}
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[T]) insertAtPerBlockHead(i *T) {
|
||||
if e.PerBlockHead == nil {
|
||||
e.PerBlockHead = i
|
||||
e.PerBlockEnd = i
|
||||
return
|
||||
}
|
||||
e.setNext(i, e.PerBlockHead)
|
||||
e.setPrev(e.PerBlockHead, i)
|
||||
e.PerBlockHead = i
|
||||
}
|
||||
|
||||
// FlushPendingInstructions implements ExecutableContext.
|
||||
func (e *ExecutableContextT[T]) FlushPendingInstructions() {
|
||||
l := len(e.PendingInstructions)
|
||||
if l == 0 {
|
||||
return
|
||||
}
|
||||
for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order.
|
||||
e.insertAtPerBlockHead(e.PendingInstructions[i])
|
||||
}
|
||||
e.PendingInstructions = e.PendingInstructions[:0]
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[T]) Reset() {
|
||||
e.labelPositionPool.Reset()
|
||||
e.InstructionPool.Reset()
|
||||
for l := Label(0); l <= e.NextLabel; l++ {
|
||||
delete(e.LabelPositions, l)
|
||||
}
|
||||
e.PendingInstructions = e.PendingInstructions[:0]
|
||||
e.OrderedBlockLabels = e.OrderedBlockLabels[:0]
|
||||
e.RootInstr = nil
|
||||
e.SsaBlockIDToLabels = e.SsaBlockIDToLabels[:0]
|
||||
e.PerBlockHead, e.PerBlockEnd = nil, nil
|
||||
e.NextLabel = LabelInvalid
|
||||
}
|
||||
|
||||
// AllocateLabel allocates an unused label.
|
||||
func (e *ExecutableContextT[T]) AllocateLabel() Label {
|
||||
e.NextLabel++
|
||||
return e.NextLabel
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[T]) AllocateLabelPosition(la Label) *LabelPosition[T] {
|
||||
l := e.labelPositionPool.Allocate()
|
||||
l.L = la
|
||||
return l
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[T]) GetOrAllocateSSABlockLabel(blk ssa.BasicBlock) Label {
|
||||
if blk.ReturnBlock() {
|
||||
return LabelReturn
|
||||
}
|
||||
l := e.SsaBlockIDToLabels[blk.ID()]
|
||||
if l == LabelInvalid {
|
||||
l = e.AllocateLabel()
|
||||
e.SsaBlockIDToLabels[blk.ID()] = l
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[T]) allocateNop0() *T {
|
||||
i := e.InstructionPool.Allocate()
|
||||
e.asNop(i)
|
||||
return i
|
||||
}
|
||||
|
||||
// LinkAdjacentBlocks implements backend.Machine.
|
||||
func (e *ExecutableContextT[T]) LinkAdjacentBlocks(prev, next ssa.BasicBlock) {
|
||||
prevLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(prev)]
|
||||
nextLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(next)]
|
||||
e.setNext(prevLabelPos.End, nextLabelPos.Begin)
|
||||
}
|
||||
|
||||
// LabelPosition represents the regions of the generated code which the label represents.
|
||||
type LabelPosition[Instr any] struct {
|
||||
SB ssa.BasicBlock
|
||||
L Label
|
||||
Begin, End *Instr
|
||||
BinaryOffset int64
|
||||
}
|
||||
|
||||
// Label represents a position in the generated code which is either
|
||||
// a real instruction or the constant InstructionPool (e.g. jump tables).
|
||||
//
|
||||
// This is exactly the same as the traditional "label" in assembly code.
|
||||
type Label uint32
|
||||
|
||||
const (
|
||||
LabelInvalid Label = 0
|
||||
LabelReturn Label = math.MaxUint32
|
||||
)
|
||||
|
||||
// String implements backend.Machine.
|
||||
func (l Label) String() string {
|
||||
return fmt.Sprintf("L%d", l)
|
||||
}
|
33
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/go_call.go
generated
vendored
Normal file
33
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/go_call.go
generated
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
package backend
|
||||
|
||||
import "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
|
||||
// GoFunctionCallRequiredStackSize returns the size of the stack required for the Go function call.
|
||||
// argBegin is the index of the first argument in the signature which is not either execution context or module context.
|
||||
func GoFunctionCallRequiredStackSize(sig *ssa.Signature, argBegin int) (ret, retUnaligned int64) {
|
||||
var paramNeededInBytes, resultNeededInBytes int64
|
||||
for _, p := range sig.Params[argBegin:] {
|
||||
s := int64(p.Size())
|
||||
if s < 8 {
|
||||
s = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
}
|
||||
paramNeededInBytes += s
|
||||
}
|
||||
for _, r := range sig.Results {
|
||||
s := int64(r.Size())
|
||||
if s < 8 {
|
||||
s = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
}
|
||||
resultNeededInBytes += s
|
||||
}
|
||||
|
||||
if paramNeededInBytes > resultNeededInBytes {
|
||||
ret = paramNeededInBytes
|
||||
} else {
|
||||
ret = resultNeededInBytes
|
||||
}
|
||||
retUnaligned = ret
|
||||
// Align to 16 bytes.
|
||||
ret = (ret + 15) &^ 15
|
||||
return
|
||||
}
|
186
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi.go
generated
vendored
Normal file
186
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi.go
generated
vendored
Normal file
@ -0,0 +1,186 @@
|
||||
package amd64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// For the details of the ABI, see:
|
||||
// https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#amd64-architecture
|
||||
|
||||
var (
|
||||
intArgResultRegs = []regalloc.RealReg{rax, rbx, rcx, rdi, rsi, r8, r9, r10, r11}
|
||||
floatArgResultRegs = []regalloc.RealReg{xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7}
|
||||
)
|
||||
|
||||
var regInfo = ®alloc.RegisterInfo{
|
||||
AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{
|
||||
regalloc.RegTypeInt: {
|
||||
rax, rcx, rdx, rbx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15,
|
||||
},
|
||||
regalloc.RegTypeFloat: {
|
||||
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
|
||||
},
|
||||
},
|
||||
CalleeSavedRegisters: regalloc.NewRegSet(
|
||||
rdx, r12, r13, r14, r15,
|
||||
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
|
||||
),
|
||||
CallerSavedRegisters: regalloc.NewRegSet(
|
||||
rax, rcx, rbx, rsi, rdi, r8, r9, r10, r11,
|
||||
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
|
||||
),
|
||||
RealRegToVReg: []regalloc.VReg{
|
||||
rax: raxVReg, rcx: rcxVReg, rdx: rdxVReg, rbx: rbxVReg, rsp: rspVReg, rbp: rbpVReg, rsi: rsiVReg, rdi: rdiVReg,
|
||||
r8: r8VReg, r9: r9VReg, r10: r10VReg, r11: r11VReg, r12: r12VReg, r13: r13VReg, r14: r14VReg, r15: r15VReg,
|
||||
xmm0: xmm0VReg, xmm1: xmm1VReg, xmm2: xmm2VReg, xmm3: xmm3VReg, xmm4: xmm4VReg, xmm5: xmm5VReg, xmm6: xmm6VReg,
|
||||
xmm7: xmm7VReg, xmm8: xmm8VReg, xmm9: xmm9VReg, xmm10: xmm10VReg, xmm11: xmm11VReg, xmm12: xmm12VReg,
|
||||
xmm13: xmm13VReg, xmm14: xmm14VReg, xmm15: xmm15VReg,
|
||||
},
|
||||
RealRegName: func(r regalloc.RealReg) string { return regNames[r] },
|
||||
RealRegType: func(r regalloc.RealReg) regalloc.RegType {
|
||||
if r < xmm0 {
|
||||
return regalloc.RegTypeInt
|
||||
}
|
||||
return regalloc.RegTypeFloat
|
||||
},
|
||||
}
|
||||
|
||||
// ArgsResultsRegs implements backend.Machine.
|
||||
func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) {
|
||||
return intArgResultRegs, floatArgResultRegs
|
||||
}
|
||||
|
||||
// LowerParams implements backend.Machine.
|
||||
func (m *machine) LowerParams(args []ssa.Value) {
|
||||
a := m.currentABI
|
||||
|
||||
for i, ssaArg := range args {
|
||||
if !ssaArg.Valid() {
|
||||
continue
|
||||
}
|
||||
reg := m.c.VRegOf(ssaArg)
|
||||
arg := &a.Args[i]
|
||||
if arg.Kind == backend.ABIArgKindReg {
|
||||
m.InsertMove(reg, arg.Reg, arg.Type)
|
||||
} else {
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <-- RBP
|
||||
// | ........... |
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ........... |
|
||||
// | spill slot 0 |
|
||||
// RSP--> +-----------------+
|
||||
// (low address)
|
||||
|
||||
// Load the value from the arg stack slot above the current RBP.
|
||||
load := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmRBPReg(uint32(arg.Offset + 16)))
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32:
|
||||
load.asMovzxRmR(extModeLQ, mem, reg)
|
||||
case ssa.TypeI64:
|
||||
load.asMov64MR(mem, reg)
|
||||
case ssa.TypeF32:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovss, mem, reg)
|
||||
case ssa.TypeF64:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovsd, mem, reg)
|
||||
case ssa.TypeV128:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, reg)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
m.insert(load)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LowerReturns implements backend.Machine.
|
||||
func (m *machine) LowerReturns(rets []ssa.Value) {
|
||||
// Load the XMM registers first as it might need a temporary register to inline
|
||||
// constant return.
|
||||
a := m.currentABI
|
||||
for i, ret := range rets {
|
||||
r := &a.Rets[i]
|
||||
if !r.Type.IsInt() {
|
||||
m.LowerReturn(ret, r)
|
||||
}
|
||||
}
|
||||
// Then load the GPR registers.
|
||||
for i, ret := range rets {
|
||||
r := &a.Rets[i]
|
||||
if r.Type.IsInt() {
|
||||
m.LowerReturn(ret, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) LowerReturn(ret ssa.Value, r *backend.ABIArg) {
|
||||
reg := m.c.VRegOf(ret)
|
||||
if def := m.c.ValueDefinition(ret); def.IsFromInstr() {
|
||||
// Constant instructions are inlined.
|
||||
if inst := def.Instr; inst.Constant() {
|
||||
m.insertLoadConstant(inst, reg)
|
||||
}
|
||||
}
|
||||
if r.Kind == backend.ABIArgKindReg {
|
||||
m.InsertMove(r.Reg, reg, ret.Type())
|
||||
} else {
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <-- RBP
|
||||
// | ........... |
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ........... |
|
||||
// | spill slot 0 |
|
||||
// RSP--> +-----------------+
|
||||
// (low address)
|
||||
|
||||
// Store the value to the return stack slot above the current RBP.
|
||||
store := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmRBPReg(uint32(m.currentABI.ArgStackSize + 16 + r.Offset)))
|
||||
switch r.Type {
|
||||
case ssa.TypeI32:
|
||||
store.asMovRM(reg, mem, 4)
|
||||
case ssa.TypeI64:
|
||||
store.asMovRM(reg, mem, 8)
|
||||
case ssa.TypeF32:
|
||||
store.asXmmMovRM(sseOpcodeMovss, reg, mem)
|
||||
case ssa.TypeF64:
|
||||
store.asXmmMovRM(sseOpcodeMovsd, reg, mem)
|
||||
case ssa.TypeV128:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, reg, mem)
|
||||
}
|
||||
m.insert(store)
|
||||
}
|
||||
}
|
9
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.go
generated
vendored
Normal file
9
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.go
generated
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
package amd64
|
||||
|
||||
// entrypoint enters the machine code generated by this backend which begins with the preamble generated by functionABI.EmitGoEntryPreamble below.
|
||||
// This implements wazevo.entrypoint, and see the comments there for detail.
|
||||
func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr)
|
||||
|
||||
// afterGoFunctionCallEntrypoint enters the machine code after growing the stack.
|
||||
// This implements wazevo.afterGoFunctionCallEntrypoint, and see the comments there for detail.
|
||||
func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
|
29
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.s
generated
vendored
Normal file
29
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.s
generated
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
#include "funcdata.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr
|
||||
TEXT ·entrypoint(SB), NOSPLIT|NOFRAME, $0-48
|
||||
MOVQ preambleExecutable+0(FP), R11
|
||||
MOVQ functionExectuable+8(FP), R14
|
||||
MOVQ executionContextPtr+16(FP), AX // First argument is passed in AX.
|
||||
MOVQ moduleContextPtr+24(FP), BX // Second argument is passed in BX.
|
||||
MOVQ paramResultSlicePtr+32(FP), R12
|
||||
MOVQ goAllocatedStackSlicePtr+40(FP), R13
|
||||
JMP R11
|
||||
|
||||
// afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
|
||||
TEXT ·afterGoFunctionCallEntrypoint(SB), NOSPLIT|NOFRAME, $0-32
|
||||
MOVQ executable+0(FP), CX
|
||||
MOVQ executionContextPtr+8(FP), AX // First argument is passed in AX.
|
||||
|
||||
// Save the stack pointer and frame pointer.
|
||||
MOVQ BP, 16(AX) // 16 == ExecutionContextOffsetOriginalFramePointer
|
||||
MOVQ SP, 24(AX) // 24 == ExecutionContextOffsetOriginalStackPointer
|
||||
|
||||
// Then set the stack pointer and frame pointer to the values we got from the Go runtime.
|
||||
MOVQ framePointer+24(FP), BP
|
||||
|
||||
// WARNING: do not update SP before BP, because the Go translates (FP) as (SP) + 8.
|
||||
MOVQ stackPointer+16(FP), SP
|
||||
|
||||
JMP CX
|
248
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go
generated
vendored
Normal file
248
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go
generated
vendored
Normal file
@ -0,0 +1,248 @@
|
||||
package amd64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
var (
|
||||
executionContextPtrReg = raxVReg
|
||||
|
||||
// Followings are callee saved registers. They can be used freely in the entry preamble
|
||||
// since the preamble is called via Go assembly function which has stack-based ABI.
|
||||
|
||||
// savedExecutionContextPtr also must be a callee-saved reg so that they can be used in the prologue and epilogue.
|
||||
savedExecutionContextPtr = rdxVReg
|
||||
// paramResultSlicePtr must match with entrypoint function in abi_entry_amd64.s.
|
||||
paramResultSlicePtr = r12VReg
|
||||
// goAllocatedStackPtr must match with entrypoint function in abi_entry_amd64.s.
|
||||
goAllocatedStackPtr = r13VReg
|
||||
// functionExecutable must match with entrypoint function in abi_entry_amd64.s.
|
||||
functionExecutable = r14VReg
|
||||
tmpIntReg = r15VReg
|
||||
tmpXmmReg = xmm15VReg
|
||||
)
|
||||
|
||||
// CompileEntryPreamble implements backend.Machine.
|
||||
func (m *machine) CompileEntryPreamble(sig *ssa.Signature) []byte {
|
||||
root := m.compileEntryPreamble(sig)
|
||||
m.encodeWithoutSSA(root)
|
||||
buf := m.c.Buf()
|
||||
return buf
|
||||
}
|
||||
|
||||
func (m *machine) compileEntryPreamble(sig *ssa.Signature) *instruction {
|
||||
abi := backend.FunctionABI{}
|
||||
abi.Init(sig, intArgResultRegs, floatArgResultRegs)
|
||||
|
||||
root := m.allocateNop()
|
||||
|
||||
//// ----------------------------------- prologue ----------------------------------- ////
|
||||
|
||||
// First, we save executionContextPtrReg into a callee-saved register so that it can be used in epilogue as well.
|
||||
// mov %executionContextPtrReg, %savedExecutionContextPtr
|
||||
cur := m.move64(executionContextPtrReg, savedExecutionContextPtr, root)
|
||||
|
||||
// Next is to save the original RBP and RSP into the execution context.
|
||||
cur = m.saveOriginalRSPRBP(cur)
|
||||
|
||||
// Now set the RSP to the Go-allocated stack pointer.
|
||||
// mov %goAllocatedStackPtr, %rsp
|
||||
cur = m.move64(goAllocatedStackPtr, rspVReg, cur)
|
||||
|
||||
if stackSlotSize := abi.AlignedArgResultStackSlotSize(); stackSlotSize > 0 {
|
||||
// Allocate stack slots for the arguments and return values.
|
||||
// sub $stackSlotSize, %rsp
|
||||
spDec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(stackSlotSize)), rspVReg, true)
|
||||
cur = linkInstr(cur, spDec)
|
||||
}
|
||||
|
||||
var offset uint32
|
||||
for i := range abi.Args {
|
||||
if i < 2 {
|
||||
// module context ptr and execution context ptr are passed in rax and rbx by the Go assembly function.
|
||||
continue
|
||||
}
|
||||
arg := &abi.Args[i]
|
||||
cur = m.goEntryPreamblePassArg(cur, paramResultSlicePtr, offset, arg)
|
||||
if arg.Type == ssa.TypeV128 {
|
||||
offset += 16
|
||||
} else {
|
||||
offset += 8
|
||||
}
|
||||
}
|
||||
|
||||
// Zero out RBP so that the unwind/stack growth code can correctly detect the end of the stack.
|
||||
zerosRbp := m.allocateInstr().asAluRmiR(aluRmiROpcodeXor, newOperandReg(rbpVReg), rbpVReg, true)
|
||||
cur = linkInstr(cur, zerosRbp)
|
||||
|
||||
// Now ready to call the real function. Note that at this point stack pointer is already set to the Go-allocated,
|
||||
// which is aligned to 16 bytes.
|
||||
call := m.allocateInstr().asCallIndirect(newOperandReg(functionExecutable), &abi)
|
||||
cur = linkInstr(cur, call)
|
||||
|
||||
//// ----------------------------------- epilogue ----------------------------------- ////
|
||||
|
||||
// Read the results from regs and the stack, and set them correctly into the paramResultSlicePtr.
|
||||
offset = 0
|
||||
for i := range abi.Rets {
|
||||
r := &abi.Rets[i]
|
||||
cur = m.goEntryPreamblePassResult(cur, paramResultSlicePtr, offset, r, uint32(abi.ArgStackSize))
|
||||
if r.Type == ssa.TypeV128 {
|
||||
offset += 16
|
||||
} else {
|
||||
offset += 8
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, restore the original RBP and RSP.
|
||||
cur = m.restoreOriginalRSPRBP(cur)
|
||||
|
||||
ret := m.allocateInstr().asRet()
|
||||
linkInstr(cur, ret)
|
||||
return root
|
||||
}
|
||||
|
||||
// saveOriginalRSPRBP saves the original RSP and RBP into the execution context.
|
||||
func (m *machine) saveOriginalRSPRBP(cur *instruction) *instruction {
|
||||
// mov %rbp, wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg)
|
||||
// mov %rsp, wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg)
|
||||
cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, true, cur)
|
||||
cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, true, cur)
|
||||
return cur
|
||||
}
|
||||
|
||||
// restoreOriginalRSPRBP restores the original RSP and RBP from the execution context.
|
||||
func (m *machine) restoreOriginalRSPRBP(cur *instruction) *instruction {
|
||||
// mov wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg), %rbp
|
||||
// mov wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg), %rsp
|
||||
cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, false, cur)
|
||||
cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, false, cur)
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) move64(src, dst regalloc.VReg, prev *instruction) *instruction {
|
||||
mov := m.allocateInstr().asMovRR(src, dst, true)
|
||||
return linkInstr(prev, mov)
|
||||
}
|
||||
|
||||
func (m *machine) loadOrStore64AtExecutionCtx(execCtx regalloc.VReg, offset wazevoapi.Offset, r regalloc.VReg, store bool, prev *instruction) *instruction {
|
||||
mem := newOperandMem(m.newAmodeImmReg(offset.U32(), execCtx))
|
||||
instr := m.allocateInstr()
|
||||
if store {
|
||||
instr.asMovRM(r, mem, 8)
|
||||
} else {
|
||||
instr.asMov64MR(mem, r)
|
||||
}
|
||||
return linkInstr(prev, instr)
|
||||
}
|
||||
|
||||
// This is for debugging.
|
||||
func (m *machine) linkUD2(cur *instruction) *instruction { //nolint
|
||||
return linkInstr(cur, m.allocateInstr().asUD2())
|
||||
}
|
||||
|
||||
func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regalloc.VReg, offsetInParamSlice uint32, arg *backend.ABIArg) *instruction {
|
||||
var dst regalloc.VReg
|
||||
argTyp := arg.Type
|
||||
if arg.Kind == backend.ABIArgKindStack {
|
||||
// Caller saved registers ca
|
||||
switch argTyp {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
dst = tmpIntReg
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
dst = tmpXmmReg
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
} else {
|
||||
dst = arg.Reg
|
||||
}
|
||||
|
||||
load := m.allocateInstr()
|
||||
a := newOperandMem(m.newAmodeImmReg(offsetInParamSlice, paramSlicePtr))
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32:
|
||||
load.asMovzxRmR(extModeLQ, a, dst)
|
||||
case ssa.TypeI64:
|
||||
load.asMov64MR(a, dst)
|
||||
case ssa.TypeF32:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovss, a, dst)
|
||||
case ssa.TypeF64:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovsd, a, dst)
|
||||
case ssa.TypeV128:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, a, dst)
|
||||
}
|
||||
|
||||
cur = linkInstr(cur, load)
|
||||
if arg.Kind == backend.ABIArgKindStack {
|
||||
// Store back to the stack.
|
||||
store := m.allocateInstr()
|
||||
a := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset), rspVReg))
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32:
|
||||
store.asMovRM(dst, a, 4)
|
||||
case ssa.TypeI64:
|
||||
store.asMovRM(dst, a, 8)
|
||||
case ssa.TypeF32:
|
||||
store.asXmmMovRM(sseOpcodeMovss, dst, a)
|
||||
case ssa.TypeF64:
|
||||
store.asXmmMovRM(sseOpcodeMovsd, dst, a)
|
||||
case ssa.TypeV128:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, dst, a)
|
||||
}
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr regalloc.VReg, offsetInResultSlice uint32, result *backend.ABIArg, resultStackSlotBeginOffset uint32) *instruction {
|
||||
var r regalloc.VReg
|
||||
if result.Kind == backend.ABIArgKindStack {
|
||||
// Load the value to the temporary.
|
||||
load := m.allocateInstr()
|
||||
offset := resultStackSlotBeginOffset + uint32(result.Offset)
|
||||
a := newOperandMem(m.newAmodeImmReg(offset, rspVReg))
|
||||
switch result.Type {
|
||||
case ssa.TypeI32:
|
||||
r = tmpIntReg
|
||||
load.asMovzxRmR(extModeLQ, a, r)
|
||||
case ssa.TypeI64:
|
||||
r = tmpIntReg
|
||||
load.asMov64MR(a, r)
|
||||
case ssa.TypeF32:
|
||||
r = tmpXmmReg
|
||||
load.asXmmUnaryRmR(sseOpcodeMovss, a, r)
|
||||
case ssa.TypeF64:
|
||||
r = tmpXmmReg
|
||||
load.asXmmUnaryRmR(sseOpcodeMovsd, a, r)
|
||||
case ssa.TypeV128:
|
||||
r = tmpXmmReg
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, a, r)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, load)
|
||||
} else {
|
||||
r = result.Reg
|
||||
}
|
||||
|
||||
store := m.allocateInstr()
|
||||
a := newOperandMem(m.newAmodeImmReg(offsetInResultSlice, resultSlicePtr))
|
||||
switch result.Type {
|
||||
case ssa.TypeI32:
|
||||
store.asMovRM(r, a, 4)
|
||||
case ssa.TypeI64:
|
||||
store.asMovRM(r, a, 8)
|
||||
case ssa.TypeF32:
|
||||
store.asXmmMovRM(sseOpcodeMovss, r, a)
|
||||
case ssa.TypeF64:
|
||||
store.asXmmMovRM(sseOpcodeMovsd, r, a)
|
||||
case ssa.TypeV128:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, r, a)
|
||||
}
|
||||
|
||||
return linkInstr(cur, store)
|
||||
}
|
443
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go
generated
vendored
Normal file
443
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go
generated
vendored
Normal file
@ -0,0 +1,443 @@
|
||||
package amd64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
var calleeSavedVRegs = []regalloc.VReg{
|
||||
rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
|
||||
xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
|
||||
}
|
||||
|
||||
// CompileGoFunctionTrampoline implements backend.Machine.
|
||||
func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte {
|
||||
ectx := m.ectx
|
||||
argBegin := 1 // Skips exec context by default.
|
||||
if needModuleContextPtr {
|
||||
argBegin++
|
||||
}
|
||||
|
||||
abi := &backend.FunctionABI{}
|
||||
abi.Init(sig, intArgResultRegs, floatArgResultRegs)
|
||||
m.currentABI = abi
|
||||
|
||||
cur := m.allocateNop()
|
||||
ectx.RootInstr = cur
|
||||
|
||||
// Execution context is always the first argument.
|
||||
execCtrPtr := raxVReg
|
||||
|
||||
// First we update RBP and RSP just like the normal prologue.
|
||||
//
|
||||
// (high address) (high address)
|
||||
// RBP ----> +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | ====> | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | Return Addr | | Return Addr |
|
||||
// RSP ----> +-----------------+ | Caller_RBP |
|
||||
// (low address) +-----------------+ <----- RSP, RBP
|
||||
//
|
||||
cur = m.setupRBPRSP(cur)
|
||||
|
||||
goSliceSizeAligned, goSliceSizeAlignedUnaligned := backend.GoFunctionCallRequiredStackSize(sig, argBegin)
|
||||
cur = m.insertStackBoundsCheck(goSliceSizeAligned+8 /* size of the Go slice */, cur)
|
||||
|
||||
// Save the callee saved registers.
|
||||
cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs)
|
||||
|
||||
if needModuleContextPtr {
|
||||
moduleCtrPtr := rbxVReg // Module context is always the second argument.
|
||||
mem := m.newAmodeImmReg(
|
||||
wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.U32(),
|
||||
execCtrPtr)
|
||||
store := m.allocateInstr().asMovRM(moduleCtrPtr, newOperandMem(mem), 8)
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
|
||||
// Now let's advance the RSP to the stack slot for the arguments.
|
||||
//
|
||||
// (high address) (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | =======> | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | Return Addr | | Return Addr |
|
||||
// | Caller_RBP | | Caller_RBP |
|
||||
// RBP,RSP --> +-----------------+ +-----------------+ <----- RBP
|
||||
// (low address) | arg[N]/ret[M] |
|
||||
// | .......... |
|
||||
// | arg[1]/ret[1] |
|
||||
// | arg[0]/ret[0] |
|
||||
// +-----------------+ <----- RSP
|
||||
// (low address)
|
||||
//
|
||||
// where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions,
|
||||
// therefore will be accessed as the usual []uint64. So that's where we need to pass/receive
|
||||
// the arguments/return values to/from Go function.
|
||||
cur = m.addRSP(-int32(goSliceSizeAligned), cur)
|
||||
|
||||
// Next, we need to store all the arguments to the stack in the typical Wasm stack style.
|
||||
var offsetInGoSlice int32
|
||||
for i := range abi.Args[argBegin:] {
|
||||
arg := &abi.Args[argBegin+i]
|
||||
var v regalloc.VReg
|
||||
if arg.Kind == backend.ABIArgKindReg {
|
||||
v = arg.Reg
|
||||
} else {
|
||||
// We have saved callee saved registers, so we can use them.
|
||||
if arg.Type.IsInt() {
|
||||
v = r15VReg
|
||||
} else {
|
||||
v = xmm15VReg
|
||||
}
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
|
||||
load := m.allocateInstr()
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32:
|
||||
load.asMovzxRmR(extModeLQ, mem, v)
|
||||
case ssa.TypeI64:
|
||||
load.asMov64MR(mem, v)
|
||||
case ssa.TypeF32:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovss, mem, v)
|
||||
case ssa.TypeF64:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v)
|
||||
case ssa.TypeV128:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, load)
|
||||
}
|
||||
|
||||
store := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg))
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32:
|
||||
store.asMovRM(v, mem, 4)
|
||||
offsetInGoSlice += 8 // always uint64 rep.
|
||||
case ssa.TypeI64:
|
||||
store.asMovRM(v, mem, 8)
|
||||
offsetInGoSlice += 8
|
||||
case ssa.TypeF32:
|
||||
store.asXmmMovRM(sseOpcodeMovss, v, mem)
|
||||
offsetInGoSlice += 8 // always uint64 rep.
|
||||
case ssa.TypeF64:
|
||||
store.asXmmMovRM(sseOpcodeMovsd, v, mem)
|
||||
offsetInGoSlice += 8
|
||||
case ssa.TypeV128:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
|
||||
offsetInGoSlice += 16
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
|
||||
// Finally we push the size of the slice to the stack so the stack looks like:
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | Return Addr |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <----- RBP
|
||||
// | arg[N]/ret[M] |
|
||||
// | .......... |
|
||||
// | arg[1]/ret[1] |
|
||||
// | arg[0]/ret[0] |
|
||||
// | slice size |
|
||||
// +-----------------+ <----- RSP
|
||||
// (low address)
|
||||
//
|
||||
// push $sliceSize
|
||||
cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandImm32(uint32(goSliceSizeAlignedUnaligned))))
|
||||
|
||||
// Load the exitCode to the register.
|
||||
exitCodeReg := r12VReg // Callee saved which is already saved.
|
||||
cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(exitCode), false))
|
||||
|
||||
saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg)
|
||||
cur = linkInstr(cur, setExitCode)
|
||||
cur = linkInstr(cur, saveRsp)
|
||||
cur = linkInstr(cur, saveRbp)
|
||||
|
||||
// Ready to exit the execution.
|
||||
cur = m.storeReturnAddressAndExit(cur, execCtrPtr)
|
||||
|
||||
// We don't need the slice size anymore, so pop it.
|
||||
cur = m.addRSP(8, cur)
|
||||
|
||||
// Ready to set up the results.
|
||||
offsetInGoSlice = 0
|
||||
// To avoid overwriting with the execution context pointer by the result, we need to track the offset,
|
||||
// and defer the restoration of the result to the end of this function.
|
||||
var argOverlapWithExecCtxOffset int32 = -1
|
||||
for i := range abi.Rets {
|
||||
r := &abi.Rets[i]
|
||||
var v regalloc.VReg
|
||||
isRegResult := r.Kind == backend.ABIArgKindReg
|
||||
if isRegResult {
|
||||
v = r.Reg
|
||||
if v.RealReg() == execCtrPtr.RealReg() {
|
||||
argOverlapWithExecCtxOffset = offsetInGoSlice
|
||||
offsetInGoSlice += 8 // always uint64 rep.
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
if r.Type.IsInt() {
|
||||
v = r15VReg
|
||||
} else {
|
||||
v = xmm15VReg
|
||||
}
|
||||
}
|
||||
|
||||
load := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg))
|
||||
switch r.Type {
|
||||
case ssa.TypeI32:
|
||||
load.asMovzxRmR(extModeLQ, mem, v)
|
||||
offsetInGoSlice += 8 // always uint64 rep.
|
||||
case ssa.TypeI64:
|
||||
load.asMov64MR(mem, v)
|
||||
offsetInGoSlice += 8
|
||||
case ssa.TypeF32:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovss, mem, v)
|
||||
offsetInGoSlice += 8 // always uint64 rep.
|
||||
case ssa.TypeF64:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v)
|
||||
offsetInGoSlice += 8
|
||||
case ssa.TypeV128:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
|
||||
offsetInGoSlice += 16
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, load)
|
||||
|
||||
if !isRegResult {
|
||||
// We need to store it back to the result slot above rbp.
|
||||
store := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(abi.ArgStackSize+r.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
|
||||
switch r.Type {
|
||||
case ssa.TypeI32:
|
||||
store.asMovRM(v, mem, 4)
|
||||
case ssa.TypeI64:
|
||||
store.asMovRM(v, mem, 8)
|
||||
case ssa.TypeF32:
|
||||
store.asXmmMovRM(sseOpcodeMovss, v, mem)
|
||||
case ssa.TypeF64:
|
||||
store.asXmmMovRM(sseOpcodeMovsd, v, mem)
|
||||
case ssa.TypeV128:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
}
|
||||
|
||||
// Before return, we need to restore the callee saved registers.
|
||||
cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs)
|
||||
|
||||
if argOverlapWithExecCtxOffset >= 0 {
|
||||
// At this point execCtt is not used anymore, so we can finally store the
|
||||
// result to the register which overlaps with the execution context pointer.
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(argOverlapWithExecCtxOffset), rspVReg))
|
||||
load := m.allocateInstr().asMov64MR(mem, execCtrPtr)
|
||||
cur = linkInstr(cur, load)
|
||||
}
|
||||
|
||||
// Finally ready to return.
|
||||
cur = m.revertRBPRSP(cur)
|
||||
linkInstr(cur, m.allocateInstr().asRet())
|
||||
|
||||
m.encodeWithoutSSA(ectx.RootInstr)
|
||||
return m.c.Buf()
|
||||
}
|
||||
|
||||
func (m *machine) saveRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction {
|
||||
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
|
||||
for _, v := range regs {
|
||||
store := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx))
|
||||
switch v.RegType() {
|
||||
case regalloc.RegTypeInt:
|
||||
store.asMovRM(v, mem, 8)
|
||||
case regalloc.RegTypeFloat:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, store)
|
||||
offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) restoreRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction {
|
||||
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
|
||||
for _, v := range regs {
|
||||
load := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx))
|
||||
switch v.RegType() {
|
||||
case regalloc.RegTypeInt:
|
||||
load.asMov64MR(mem, v)
|
||||
case regalloc.RegTypeFloat:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, load)
|
||||
offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) storeReturnAddressAndExit(cur *instruction, execCtx regalloc.VReg) *instruction {
|
||||
readRip := m.allocateInstr()
|
||||
cur = linkInstr(cur, readRip)
|
||||
|
||||
ripReg := r12VReg // Callee saved which is already saved.
|
||||
saveRip := m.allocateInstr().asMovRM(
|
||||
ripReg,
|
||||
newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetGoCallReturnAddress.U32(), execCtx)),
|
||||
8,
|
||||
)
|
||||
cur = linkInstr(cur, saveRip)
|
||||
|
||||
exit := m.allocateExitSeq(execCtx)
|
||||
cur = linkInstr(cur, exit)
|
||||
|
||||
nop, l := m.allocateBrTarget()
|
||||
cur = linkInstr(cur, nop)
|
||||
readRip.asLEA(newOperandLabel(l), ripReg)
|
||||
return cur
|
||||
}
|
||||
|
||||
// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
|
||||
// stack space left. Basically this is the all allocatable registers except for RSP and RBP, and RAX which contains the
|
||||
// execution context pointer. ExecCtx pointer is always the first argument so we don't need to save it.
|
||||
var stackGrowSaveVRegs = []regalloc.VReg{
|
||||
rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
|
||||
rcxVReg, rbxVReg, rsiVReg, rdiVReg, r8VReg, r9VReg, r10VReg, r11VReg,
|
||||
xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
|
||||
xmm0VReg, xmm1VReg, xmm2VReg, xmm3VReg, xmm4VReg, xmm5VReg, xmm6VReg, xmm7VReg,
|
||||
}
|
||||
|
||||
// CompileStackGrowCallSequence implements backend.Machine.
|
||||
func (m *machine) CompileStackGrowCallSequence() []byte {
|
||||
ectx := m.ectx
|
||||
|
||||
cur := m.allocateNop()
|
||||
ectx.RootInstr = cur
|
||||
|
||||
cur = m.setupRBPRSP(cur)
|
||||
|
||||
// Execution context is always the first argument.
|
||||
execCtrPtr := raxVReg
|
||||
|
||||
// Save the callee saved and argument registers.
|
||||
cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs)
|
||||
|
||||
// Load the exitCode to the register.
|
||||
exitCodeReg := r12VReg // Already saved.
|
||||
cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(wazevoapi.ExitCodeGrowStack), false))
|
||||
|
||||
saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg)
|
||||
cur = linkInstr(cur, setExitCode)
|
||||
cur = linkInstr(cur, saveRsp)
|
||||
cur = linkInstr(cur, saveRbp)
|
||||
|
||||
// Ready to exit the execution.
|
||||
cur = m.storeReturnAddressAndExit(cur, execCtrPtr)
|
||||
|
||||
// After the exit, restore the saved registers.
|
||||
cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs)
|
||||
|
||||
// Finally ready to return.
|
||||
cur = m.revertRBPRSP(cur)
|
||||
linkInstr(cur, m.allocateInstr().asRet())
|
||||
|
||||
m.encodeWithoutSSA(ectx.RootInstr)
|
||||
return m.c.Buf()
|
||||
}
|
||||
|
||||
// insertStackBoundsCheck will insert the instructions after `cur` to check the
|
||||
// stack bounds, and if there's no sufficient spaces required for the function,
|
||||
// exit the execution and try growing it in Go world.
|
||||
func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction {
|
||||
// add $requiredStackSize, %rsp ;; Temporarily update the sp.
|
||||
// cmp ExecutionContextOffsetStackBottomPtr(%rax), %rsp ;; Compare the stack bottom and the sp.
|
||||
// ja .ok
|
||||
// sub $requiredStackSize, %rsp ;; Reverse the temporary update.
|
||||
// pushq r15 ;; save the temporary.
|
||||
// mov $requiredStackSize, %r15
|
||||
// mov %15, ExecutionContextOffsetStackGrowRequiredSize(%rax) ;; Set the required size in the execution context.
|
||||
// popq r15 ;; restore the temporary.
|
||||
// callq *ExecutionContextOffsetStackGrowCallTrampolineAddress(%rax) ;; Call the Go function to grow the stack.
|
||||
// jmp .cont
|
||||
// .ok:
|
||||
// sub $requiredStackSize, %rsp ;; Reverse the temporary update.
|
||||
// .cont:
|
||||
cur = m.addRSP(-int32(requiredStackSize), cur)
|
||||
cur = linkInstr(cur, m.allocateInstr().asCmpRmiR(true,
|
||||
newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackBottomPtr.U32(), raxVReg)),
|
||||
rspVReg, true))
|
||||
|
||||
ja := m.allocateInstr()
|
||||
cur = linkInstr(cur, ja)
|
||||
|
||||
cur = m.addRSP(int32(requiredStackSize), cur)
|
||||
|
||||
// Save the temporary.
|
||||
|
||||
cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r15VReg)))
|
||||
// Load the required size to the temporary.
|
||||
cur = linkInstr(cur, m.allocateInstr().asImm(r15VReg, uint64(requiredStackSize), true))
|
||||
// Set the required size in the execution context.
|
||||
cur = linkInstr(cur, m.allocateInstr().asMovRM(r15VReg,
|
||||
newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.U32(), raxVReg)), 8))
|
||||
// Restore the temporary.
|
||||
cur = linkInstr(cur, m.allocateInstr().asPop64(r15VReg))
|
||||
// Call the Go function to grow the stack.
|
||||
cur = linkInstr(cur, m.allocateInstr().asCallIndirect(newOperandMem(m.newAmodeImmReg(
|
||||
wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.U32(), raxVReg)), nil))
|
||||
// Jump to the continuation.
|
||||
jmpToCont := m.allocateInstr()
|
||||
cur = linkInstr(cur, jmpToCont)
|
||||
|
||||
// .ok:
|
||||
okInstr, ok := m.allocateBrTarget()
|
||||
cur = linkInstr(cur, okInstr)
|
||||
ja.asJmpIf(condNBE, newOperandLabel(ok))
|
||||
// On the ok path, we only need to reverse the temporary update.
|
||||
cur = m.addRSP(int32(requiredStackSize), cur)
|
||||
|
||||
// .cont:
|
||||
contInstr, cont := m.allocateBrTarget()
|
||||
cur = linkInstr(cur, contInstr)
|
||||
jmpToCont.asJmp(newOperandLabel(cont))
|
||||
|
||||
return cur
|
||||
}
|
168
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/cond.go
generated
vendored
Normal file
168
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/cond.go
generated
vendored
Normal file
@ -0,0 +1,168 @@
|
||||
package amd64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
type cond byte
|
||||
|
||||
const (
|
||||
// condO represents (overflow) condition.
|
||||
condO cond = iota
|
||||
// condNO represents (no overflow) condition.
|
||||
condNO
|
||||
// condB represents (< unsigned) condition.
|
||||
condB
|
||||
// condNB represents (>= unsigned) condition.
|
||||
condNB
|
||||
// condZ represents (zero) condition.
|
||||
condZ
|
||||
// condNZ represents (not-zero) condition.
|
||||
condNZ
|
||||
// condBE represents (<= unsigned) condition.
|
||||
condBE
|
||||
// condNBE represents (> unsigned) condition.
|
||||
condNBE
|
||||
// condS represents (negative) condition.
|
||||
condS
|
||||
// condNS represents (not-negative) condition.
|
||||
condNS
|
||||
// condP represents (parity) condition.
|
||||
condP
|
||||
// condNP represents (not parity) condition.
|
||||
condNP
|
||||
// condL represents (< signed) condition.
|
||||
condL
|
||||
// condNL represents (>= signed) condition.
|
||||
condNL
|
||||
// condLE represents (<= signed) condition.
|
||||
condLE
|
||||
// condNLE represents (> signed) condition.
|
||||
condNLE
|
||||
|
||||
condInvalid
|
||||
)
|
||||
|
||||
func (c cond) String() string {
|
||||
switch c {
|
||||
case condO:
|
||||
return "o"
|
||||
case condNO:
|
||||
return "no"
|
||||
case condB:
|
||||
return "b"
|
||||
case condNB:
|
||||
return "nb"
|
||||
case condZ:
|
||||
return "z"
|
||||
case condNZ:
|
||||
return "nz"
|
||||
case condBE:
|
||||
return "be"
|
||||
case condNBE:
|
||||
return "nbe"
|
||||
case condS:
|
||||
return "s"
|
||||
case condNS:
|
||||
return "ns"
|
||||
case condL:
|
||||
return "l"
|
||||
case condNL:
|
||||
return "nl"
|
||||
case condLE:
|
||||
return "le"
|
||||
case condNLE:
|
||||
return "nle"
|
||||
case condP:
|
||||
return "p"
|
||||
case condNP:
|
||||
return "np"
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
func condFromSSAIntCmpCond(origin ssa.IntegerCmpCond) cond {
|
||||
switch origin {
|
||||
case ssa.IntegerCmpCondEqual:
|
||||
return condZ
|
||||
case ssa.IntegerCmpCondNotEqual:
|
||||
return condNZ
|
||||
case ssa.IntegerCmpCondSignedLessThan:
|
||||
return condL
|
||||
case ssa.IntegerCmpCondSignedGreaterThanOrEqual:
|
||||
return condNL
|
||||
case ssa.IntegerCmpCondSignedGreaterThan:
|
||||
return condNLE
|
||||
case ssa.IntegerCmpCondSignedLessThanOrEqual:
|
||||
return condLE
|
||||
case ssa.IntegerCmpCondUnsignedLessThan:
|
||||
return condB
|
||||
case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual:
|
||||
return condNB
|
||||
case ssa.IntegerCmpCondUnsignedGreaterThan:
|
||||
return condNBE
|
||||
case ssa.IntegerCmpCondUnsignedLessThanOrEqual:
|
||||
return condBE
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
func condFromSSAFloatCmpCond(origin ssa.FloatCmpCond) cond {
|
||||
switch origin {
|
||||
case ssa.FloatCmpCondGreaterThanOrEqual:
|
||||
return condNB
|
||||
case ssa.FloatCmpCondGreaterThan:
|
||||
return condNBE
|
||||
case ssa.FloatCmpCondEqual, ssa.FloatCmpCondNotEqual, ssa.FloatCmpCondLessThan, ssa.FloatCmpCondLessThanOrEqual:
|
||||
panic(fmt.Sprintf("cond %s must be treated as a special case", origin))
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
func (c cond) encoding() byte {
|
||||
return byte(c)
|
||||
}
|
||||
|
||||
func (c cond) invert() cond {
|
||||
switch c {
|
||||
case condO:
|
||||
return condNO
|
||||
case condNO:
|
||||
return condO
|
||||
case condB:
|
||||
return condNB
|
||||
case condNB:
|
||||
return condB
|
||||
case condZ:
|
||||
return condNZ
|
||||
case condNZ:
|
||||
return condZ
|
||||
case condBE:
|
||||
return condNBE
|
||||
case condNBE:
|
||||
return condBE
|
||||
case condS:
|
||||
return condNS
|
||||
case condNS:
|
||||
return condS
|
||||
case condP:
|
||||
return condNP
|
||||
case condNP:
|
||||
return condP
|
||||
case condL:
|
||||
return condNL
|
||||
case condNL:
|
||||
return condL
|
||||
case condLE:
|
||||
return condNLE
|
||||
case condNLE:
|
||||
return condLE
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
35
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/ext.go
generated
vendored
Normal file
35
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/ext.go
generated
vendored
Normal file
@ -0,0 +1,35 @@
|
||||
package amd64
|
||||
|
||||
// extMode represents the mode of extension in movzx/movsx.
|
||||
type extMode byte
|
||||
|
||||
const (
|
||||
// extModeBL represents Byte -> Longword.
|
||||
extModeBL extMode = iota
|
||||
// extModeBQ represents Byte -> Quadword.
|
||||
extModeBQ
|
||||
// extModeWL represents Word -> Longword.
|
||||
extModeWL
|
||||
// extModeWQ represents Word -> Quadword.
|
||||
extModeWQ
|
||||
// extModeLQ represents Longword -> Quadword.
|
||||
extModeLQ
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (e extMode) String() string {
|
||||
switch e {
|
||||
case extModeBL:
|
||||
return "bl"
|
||||
case extModeBQ:
|
||||
return "bq"
|
||||
case extModeWL:
|
||||
return "wl"
|
||||
case extModeWQ:
|
||||
return "wq"
|
||||
case extModeLQ:
|
||||
return "lq"
|
||||
default:
|
||||
panic("BUG: invalid ext mode")
|
||||
}
|
||||
}
|
2472
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
generated
vendored
Normal file
2472
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1683
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
generated
vendored
Normal file
1683
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
71
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_constant.go
generated
vendored
Normal file
71
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_constant.go
generated
vendored
Normal file
@ -0,0 +1,71 @@
|
||||
package amd64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
|
||||
func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
|
||||
val := instr.Return()
|
||||
valType := val.Type()
|
||||
|
||||
vr = m.c.AllocateVReg(valType)
|
||||
m.insertLoadConstant(instr, vr)
|
||||
return
|
||||
}
|
||||
|
||||
// InsertLoadConstantBlockArg implements backend.Machine.
|
||||
func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) {
|
||||
m.insertLoadConstant(instr, vr)
|
||||
}
|
||||
|
||||
func (m *machine) insertLoadConstant(instr *ssa.Instruction, vr regalloc.VReg) {
|
||||
val := instr.Return()
|
||||
valType := val.Type()
|
||||
v := instr.ConstantVal()
|
||||
|
||||
bits := valType.Bits()
|
||||
if bits < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
|
||||
v = v & ((1 << valType.Bits()) - 1)
|
||||
}
|
||||
|
||||
switch valType {
|
||||
case ssa.TypeF32, ssa.TypeF64:
|
||||
m.lowerFconst(vr, v, bits == 64)
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
m.lowerIconst(vr, v, bits == 64)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerFconst(dst regalloc.VReg, c uint64, _64 bool) {
|
||||
if c == 0 {
|
||||
xor := m.allocateInstr().asZeros(dst)
|
||||
m.insert(xor)
|
||||
} else {
|
||||
var tmpType ssa.Type
|
||||
if _64 {
|
||||
tmpType = ssa.TypeI64
|
||||
} else {
|
||||
tmpType = ssa.TypeI32
|
||||
}
|
||||
tmpInt := m.c.AllocateVReg(tmpType)
|
||||
loadToGP := m.allocateInstr().asImm(tmpInt, c, _64)
|
||||
m.insert(loadToGP)
|
||||
|
||||
movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpInt), dst, _64)
|
||||
m.insert(movToXmm)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerIconst(dst regalloc.VReg, c uint64, _64 bool) {
|
||||
i := m.allocateInstr()
|
||||
if c == 0 {
|
||||
i.asZeros(dst)
|
||||
} else {
|
||||
i.asImm(dst, c, _64)
|
||||
}
|
||||
m.insert(i)
|
||||
}
|
187
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go
generated
vendored
Normal file
187
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go
generated
vendored
Normal file
@ -0,0 +1,187 @@
|
||||
package amd64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
var addendsMatchOpcodes = [...]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst, ssa.OpcodeIshl}
|
||||
|
||||
type addend struct {
|
||||
r regalloc.VReg
|
||||
off int64
|
||||
shift byte
|
||||
}
|
||||
|
||||
func (a addend) String() string {
|
||||
return fmt.Sprintf("addend{r=%s, off=%d, shift=%d}", a.r, a.off, a.shift)
|
||||
}
|
||||
|
||||
// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
|
||||
func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32) (am *amode) {
|
||||
def := m.c.ValueDefinition(ptr)
|
||||
|
||||
if offsetBase&0x80000000 != 0 {
|
||||
// Special casing the huge base offset whose MSB is set. In x64, the immediate is always
|
||||
// sign-extended, but our IR semantics requires the offset base is always unsigned.
|
||||
// Note that this should be extremely rare or even this shouldn't hit in the real application,
|
||||
// therefore we don't need to optimize this case in my opinion.
|
||||
|
||||
a := m.lowerAddend(def)
|
||||
off64 := a.off + int64(offsetBase)
|
||||
offsetBaseReg := m.c.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerIconst(offsetBaseReg, uint64(off64), true)
|
||||
if a.r != regalloc.VRegInvalid {
|
||||
return m.newAmodeRegRegShift(0, offsetBaseReg, a.r, a.shift)
|
||||
} else {
|
||||
return m.newAmodeImmReg(0, offsetBaseReg)
|
||||
}
|
||||
}
|
||||
|
||||
if op := m.c.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op == ssa.OpcodeIadd {
|
||||
add := def.Instr
|
||||
x, y := add.Arg2()
|
||||
xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
|
||||
ax := m.lowerAddend(xDef)
|
||||
ay := m.lowerAddend(yDef)
|
||||
add.MarkLowered()
|
||||
return m.lowerAddendsToAmode(ax, ay, offsetBase)
|
||||
} else {
|
||||
// If it is not an Iadd, then we lower the one addend.
|
||||
a := m.lowerAddend(def)
|
||||
// off is always 0 if r is valid.
|
||||
if a.r != regalloc.VRegInvalid {
|
||||
if a.shift != 0 {
|
||||
tmpReg := m.c.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerIconst(tmpReg, 0, true)
|
||||
return m.newAmodeRegRegShift(offsetBase, tmpReg, a.r, a.shift)
|
||||
}
|
||||
return m.newAmodeImmReg(offsetBase, a.r)
|
||||
} else {
|
||||
off64 := a.off + int64(offsetBase)
|
||||
tmpReg := m.c.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerIconst(tmpReg, uint64(off64), true)
|
||||
return m.newAmodeImmReg(0, tmpReg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerAddendsToAmode(x, y addend, offBase uint32) *amode {
|
||||
if x.r != regalloc.VRegInvalid && x.off != 0 || y.r != regalloc.VRegInvalid && y.off != 0 {
|
||||
panic("invalid input")
|
||||
}
|
||||
|
||||
u64 := uint64(x.off+y.off) + uint64(offBase)
|
||||
if u64 != 0 {
|
||||
if _, ok := asImm32(u64, false); !ok {
|
||||
tmpReg := m.c.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerIconst(tmpReg, u64, true)
|
||||
// Blank u64 as it has been already lowered.
|
||||
u64 = 0
|
||||
|
||||
if x.r == regalloc.VRegInvalid {
|
||||
x.r = tmpReg
|
||||
} else if y.r == regalloc.VRegInvalid {
|
||||
y.r = tmpReg
|
||||
} else {
|
||||
// We already know that either rx or ry is invalid,
|
||||
// so we overwrite it with the temporary register.
|
||||
panic("BUG")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u32 := uint32(u64)
|
||||
switch {
|
||||
// We assume rx, ry are valid iff offx, offy are 0.
|
||||
case x.r != regalloc.VRegInvalid && y.r != regalloc.VRegInvalid:
|
||||
switch {
|
||||
case x.shift != 0 && y.shift != 0:
|
||||
// Cannot absorb two shifted registers, must lower one to a shift instruction.
|
||||
shifted := m.allocateInstr()
|
||||
shifted.asShiftR(shiftROpShiftLeft, newOperandImm32(uint32(x.shift)), x.r, true)
|
||||
m.insert(shifted)
|
||||
|
||||
return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift)
|
||||
case x.shift != 0 && y.shift == 0:
|
||||
// Swap base and index.
|
||||
x, y = y, x
|
||||
fallthrough
|
||||
default:
|
||||
return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift)
|
||||
}
|
||||
case x.r == regalloc.VRegInvalid && y.r != regalloc.VRegInvalid:
|
||||
x, y = y, x
|
||||
fallthrough
|
||||
case x.r != regalloc.VRegInvalid && y.r == regalloc.VRegInvalid:
|
||||
if x.shift != 0 {
|
||||
zero := m.c.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerIconst(zero, 0, true)
|
||||
return m.newAmodeRegRegShift(u32, zero, x.r, x.shift)
|
||||
}
|
||||
return m.newAmodeImmReg(u32, x.r)
|
||||
default: // Both are invalid: use the offset.
|
||||
tmpReg := m.c.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerIconst(tmpReg, u64, true)
|
||||
return m.newAmodeImmReg(0, tmpReg)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerAddend(x *backend.SSAValueDefinition) addend {
|
||||
if x.IsFromBlockParam() {
|
||||
return addend{x.BlkParamVReg, 0, 0}
|
||||
}
|
||||
// Ensure the addend is not referenced in multiple places; we will discard nested Iadds.
|
||||
op := m.c.MatchInstrOneOf(x, addendsMatchOpcodes[:])
|
||||
if op != ssa.OpcodeInvalid && op != ssa.OpcodeIadd {
|
||||
return m.lowerAddendFromInstr(x.Instr)
|
||||
}
|
||||
p := m.getOperand_Reg(x)
|
||||
return addend{p.reg(), 0, 0}
|
||||
}
|
||||
|
||||
// lowerAddendFromInstr takes an instruction returns a Vreg and an offset that can be used in an address mode.
|
||||
// The Vreg is regalloc.VRegInvalid if the addend cannot be lowered to a register.
|
||||
// The offset is 0 if the addend can be lowered to a register.
|
||||
func (m *machine) lowerAddendFromInstr(instr *ssa.Instruction) addend {
|
||||
instr.MarkLowered()
|
||||
switch op := instr.Opcode(); op {
|
||||
case ssa.OpcodeIconst:
|
||||
u64 := instr.ConstantVal()
|
||||
if instr.Return().Type().Bits() == 32 {
|
||||
return addend{regalloc.VRegInvalid, int64(int32(u64)), 0} // sign-extend.
|
||||
} else {
|
||||
return addend{regalloc.VRegInvalid, int64(u64), 0}
|
||||
}
|
||||
case ssa.OpcodeUExtend, ssa.OpcodeSExtend:
|
||||
input := instr.Arg()
|
||||
inputDef := m.c.ValueDefinition(input)
|
||||
if input.Type().Bits() != 32 {
|
||||
panic("BUG: invalid input type " + input.Type().String())
|
||||
}
|
||||
constInst := inputDef.IsFromInstr() && inputDef.Instr.Constant()
|
||||
switch {
|
||||
case constInst && op == ssa.OpcodeSExtend:
|
||||
return addend{regalloc.VRegInvalid, int64(uint32(inputDef.Instr.ConstantVal())), 0}
|
||||
case constInst && op == ssa.OpcodeUExtend:
|
||||
return addend{regalloc.VRegInvalid, int64(int32(inputDef.Instr.ConstantVal())), 0} // sign-extend!
|
||||
default:
|
||||
r := m.getOperand_Reg(inputDef)
|
||||
return addend{r.reg(), 0, 0}
|
||||
}
|
||||
case ssa.OpcodeIshl:
|
||||
// If the addend is a shift, we can only handle it if the shift amount is a constant.
|
||||
x, amount := instr.Arg2()
|
||||
amountDef := m.c.ValueDefinition(amount)
|
||||
if amountDef.IsFromInstr() && amountDef.Instr.Constant() && amountDef.Instr.ConstantVal() <= 3 {
|
||||
r := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
return addend{r.reg(), 0, uint8(amountDef.Instr.ConstantVal())}
|
||||
}
|
||||
r := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
return addend{r.reg(), 0, 0}
|
||||
}
|
||||
panic("BUG: invalid opcode")
|
||||
}
|
3611
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
generated
vendored
Normal file
3611
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
304
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
generated
vendored
Normal file
304
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
generated
vendored
Normal file
@ -0,0 +1,304 @@
|
||||
package amd64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
)
|
||||
|
||||
// PostRegAlloc implements backend.Machine.
|
||||
func (m *machine) PostRegAlloc() {
|
||||
m.setupPrologue()
|
||||
m.postRegAlloc()
|
||||
}
|
||||
|
||||
func (m *machine) setupPrologue() {
|
||||
cur := m.ectx.RootInstr
|
||||
prevInitInst := cur.next
|
||||
|
||||
// At this point, we have the stack layout as follows:
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+ <----- RBP (somewhere in the middle of the stack)
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | Return Addr |
|
||||
// RSP ----> +-----------------+
|
||||
// (low address)
|
||||
|
||||
// First, we push the RBP, and update the RBP to the current RSP.
|
||||
//
|
||||
// (high address) (high address)
|
||||
// RBP ----> +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | ====> | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | Return Addr | | Return Addr |
|
||||
// RSP ----> +-----------------+ | Caller_RBP |
|
||||
// (low address) +-----------------+ <----- RSP, RBP
|
||||
//
|
||||
cur = m.setupRBPRSP(cur)
|
||||
|
||||
if !m.stackBoundsCheckDisabled {
|
||||
cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur)
|
||||
}
|
||||
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | xxxxx | | xxxxx |
|
||||
// | Return Addr | | Return Addr |
|
||||
// | Caller_RBP | ====> | Caller_RBP |
|
||||
// RBP,RSP->+-----------------+ +-----------------+ <----- RBP
|
||||
// (low address) | clobbered M |
|
||||
// | clobbered 1 |
|
||||
// | ........... |
|
||||
// | clobbered 0 |
|
||||
// +-----------------+ <----- RSP
|
||||
//
|
||||
if regs := m.clobberedRegs; len(regs) > 0 {
|
||||
for i := range regs {
|
||||
r := regs[len(regs)-1-i] // Reverse order.
|
||||
if r.RegType() == regalloc.RegTypeInt {
|
||||
cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r)))
|
||||
} else {
|
||||
// Push the XMM register is not supported by the PUSH instruction.
|
||||
cur = m.addRSP(-16, cur)
|
||||
push := m.allocateInstr().asXmmMovRM(
|
||||
sseOpcodeMovdqu, r, newOperandMem(m.newAmodeImmReg(0, rspVReg)),
|
||||
)
|
||||
cur = linkInstr(cur, push)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if size := m.spillSlotSize; size > 0 {
|
||||
// Simply decrease the RSP to allocate the spill slots.
|
||||
// sub $size, %rsp
|
||||
cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(size)), rspVReg, true))
|
||||
|
||||
// At this point, we have the stack layout as follows:
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <--- RBP
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 1 |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ............ |
|
||||
// | spill slot 0 |
|
||||
// +-----------------+ <--- RSP
|
||||
// (low address)
|
||||
}
|
||||
|
||||
linkInstr(cur, prevInitInst)
|
||||
}
|
||||
|
||||
// postRegAlloc does multiple things while walking through the instructions:
|
||||
// 1. Inserts the epilogue code.
|
||||
// 2. Removes the redundant copy instruction.
|
||||
// 3. Inserts the dec/inc RSP instruction right before/after the call instruction.
|
||||
// 4. Lowering that is supposed to be done after regalloc.
|
||||
func (m *machine) postRegAlloc() {
|
||||
ectx := m.ectx
|
||||
for cur := ectx.RootInstr; cur != nil; cur = cur.next {
|
||||
switch k := cur.kind; k {
|
||||
case ret:
|
||||
m.setupEpilogueAfter(cur.prev)
|
||||
continue
|
||||
case fcvtToSintSequence, fcvtToUintSequence:
|
||||
m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
|
||||
if k == fcvtToSintSequence {
|
||||
m.lowerFcvtToSintSequenceAfterRegalloc(cur)
|
||||
} else {
|
||||
m.lowerFcvtToUintSequenceAfterRegalloc(cur)
|
||||
}
|
||||
prev := cur.prev
|
||||
next := cur.next
|
||||
cur := prev
|
||||
for _, instr := range m.ectx.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
linkInstr(cur, next)
|
||||
continue
|
||||
case xmmCMov:
|
||||
m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
|
||||
m.lowerXmmCmovAfterRegAlloc(cur)
|
||||
prev := cur.prev
|
||||
next := cur.next
|
||||
cur := prev
|
||||
for _, instr := range m.ectx.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
linkInstr(cur, next)
|
||||
continue
|
||||
case idivRemSequence:
|
||||
m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
|
||||
m.lowerIDivRemSequenceAfterRegAlloc(cur)
|
||||
prev := cur.prev
|
||||
next := cur.next
|
||||
cur := prev
|
||||
for _, instr := range m.ectx.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
linkInstr(cur, next)
|
||||
continue
|
||||
case call, callIndirect:
|
||||
// At this point, reg alloc is done, therefore we can safely insert dec/inc RPS instruction
|
||||
// right before/after the call instruction. If this is done before reg alloc, the stack slot
|
||||
// can point to the wrong location and therefore results in a wrong value.
|
||||
call := cur
|
||||
next := call.next
|
||||
_, _, _, _, size := backend.ABIInfoFromUint64(call.u2)
|
||||
if size > 0 {
|
||||
dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true)
|
||||
linkInstr(call.prev, dec)
|
||||
linkInstr(dec, call)
|
||||
inc := m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(size), rspVReg, true)
|
||||
linkInstr(call, inc)
|
||||
linkInstr(inc, next)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Removes the redundant copy instruction.
|
||||
if cur.IsCopy() && cur.op1.reg().RealReg() == cur.op2.reg().RealReg() {
|
||||
prev, next := cur.prev, cur.next
|
||||
// Remove the copy instruction.
|
||||
prev.next = next
|
||||
if next != nil {
|
||||
next.prev = prev
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) setupEpilogueAfter(cur *instruction) {
|
||||
prevNext := cur.next
|
||||
|
||||
// At this point, we have the stack layout as follows:
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <--- RBP
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 1 |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ............ |
|
||||
// | spill slot 0 |
|
||||
// +-----------------+ <--- RSP
|
||||
// (low address)
|
||||
|
||||
if size := m.spillSlotSize; size > 0 {
|
||||
// Simply increase the RSP to free the spill slots.
|
||||
// add $size, %rsp
|
||||
cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(uint32(size)), rspVReg, true))
|
||||
}
|
||||
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | ReturnAddress | | ReturnAddress |
|
||||
// | Caller_RBP | | Caller_RBP |
|
||||
// RBP ---> +-----------------+ ========> +-----------------+ <---- RSP, RBP
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 1 |
|
||||
// | clobbered 0 |
|
||||
// RSP ---> +-----------------+
|
||||
// (low address)
|
||||
//
|
||||
if regs := m.clobberedRegs; len(regs) > 0 {
|
||||
for _, r := range regs {
|
||||
if r.RegType() == regalloc.RegTypeInt {
|
||||
cur = linkInstr(cur, m.allocateInstr().asPop64(r))
|
||||
} else {
|
||||
// Pop the XMM register is not supported by the POP instruction.
|
||||
pop := m.allocateInstr().asXmmUnaryRmR(
|
||||
sseOpcodeMovdqu, newOperandMem(m.newAmodeImmReg(0, rspVReg)), r,
|
||||
)
|
||||
cur = linkInstr(cur, pop)
|
||||
cur = m.addRSP(16, cur)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now roll back the RSP to RBP, and pop the caller's RBP.
|
||||
cur = m.revertRBPRSP(cur)
|
||||
|
||||
linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
func (m *machine) addRSP(offset int32, cur *instruction) *instruction {
|
||||
if offset == 0 {
|
||||
return cur
|
||||
}
|
||||
opcode := aluRmiROpcodeAdd
|
||||
if offset < 0 {
|
||||
opcode = aluRmiROpcodeSub
|
||||
offset = -offset
|
||||
}
|
||||
return linkInstr(cur, m.allocateInstr().asAluRmiR(opcode, newOperandImm32(uint32(offset)), rspVReg, true))
|
||||
}
|
||||
|
||||
func (m *machine) setupRBPRSP(cur *instruction) *instruction {
|
||||
cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(rbpVReg)))
|
||||
cur = linkInstr(cur, m.allocateInstr().asMovRR(rspVReg, rbpVReg, true))
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) revertRBPRSP(cur *instruction) *instruction {
|
||||
cur = linkInstr(cur, m.allocateInstr().asMovRR(rbpVReg, rspVReg, true))
|
||||
cur = linkInstr(cur, m.allocateInstr().asPop64(rbpVReg))
|
||||
return cur
|
||||
}
|
153
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go
generated
vendored
Normal file
153
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go
generated
vendored
Normal file
@ -0,0 +1,153 @@
|
||||
package amd64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// InsertMoveBefore implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {
|
||||
typ := src.RegType()
|
||||
if typ != dst.RegType() {
|
||||
panic("BUG: src and dst must have the same type")
|
||||
}
|
||||
|
||||
mov := m.allocateInstr()
|
||||
if typ == regalloc.RegTypeInt {
|
||||
mov.asMovRR(src, dst, true)
|
||||
} else {
|
||||
mov.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandReg(src), dst)
|
||||
}
|
||||
|
||||
cur := instr.prev
|
||||
prevNext := cur.next
|
||||
cur = linkInstr(cur, mov)
|
||||
linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
|
||||
if !v.IsRealReg() {
|
||||
panic("BUG: VReg must be backed by real reg to be stored")
|
||||
}
|
||||
|
||||
typ := m.c.TypeOf(v)
|
||||
|
||||
var prevNext, cur *instruction
|
||||
if after {
|
||||
cur, prevNext = instr, instr.next
|
||||
} else {
|
||||
cur, prevNext = instr.prev, instr
|
||||
}
|
||||
|
||||
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
|
||||
store := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg))
|
||||
switch typ {
|
||||
case ssa.TypeI32:
|
||||
store.asMovRM(v, mem, 4)
|
||||
case ssa.TypeI64:
|
||||
store.asMovRM(v, mem, 8)
|
||||
case ssa.TypeF32:
|
||||
store.asXmmMovRM(sseOpcodeMovss, v, mem)
|
||||
case ssa.TypeF64:
|
||||
store.asXmmMovRM(sseOpcodeMovsd, v, mem)
|
||||
case ssa.TypeV128:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
|
||||
}
|
||||
|
||||
cur = linkInstr(cur, store)
|
||||
return linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
|
||||
if !v.IsRealReg() {
|
||||
panic("BUG: VReg must be backed by real reg to be stored")
|
||||
}
|
||||
|
||||
typ := m.c.TypeOf(v)
|
||||
var prevNext, cur *instruction
|
||||
if after {
|
||||
cur, prevNext = instr, instr.next
|
||||
} else {
|
||||
cur, prevNext = instr.prev, instr
|
||||
}
|
||||
|
||||
// Load the value to the temporary.
|
||||
load := m.allocateInstr()
|
||||
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
|
||||
a := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg))
|
||||
switch typ {
|
||||
case ssa.TypeI32:
|
||||
load.asMovzxRmR(extModeLQ, a, v)
|
||||
case ssa.TypeI64:
|
||||
load.asMov64MR(a, v)
|
||||
case ssa.TypeF32:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovss, a, v)
|
||||
case ssa.TypeF64:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovsd, a, v)
|
||||
case ssa.TypeV128:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, a, v)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
|
||||
cur = linkInstr(cur, load)
|
||||
return linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// ClobberedRegisters implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) ClobberedRegisters(regs []regalloc.VReg) {
|
||||
m.clobberedRegs = append(m.clobberedRegs[:0], regs...)
|
||||
}
|
||||
|
||||
// Swap implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {
|
||||
if x1.RegType() == regalloc.RegTypeInt {
|
||||
prevNext := cur.next
|
||||
xc := m.allocateInstr().asXCHG(x1, newOperandReg(x2), 8)
|
||||
cur = linkInstr(cur, xc)
|
||||
linkInstr(cur, prevNext)
|
||||
} else {
|
||||
if tmp.Valid() {
|
||||
prevNext := cur.next
|
||||
m.InsertMoveBefore(tmp, x1, prevNext)
|
||||
m.InsertMoveBefore(x1, x2, prevNext)
|
||||
m.InsertMoveBefore(x2, tmp, prevNext)
|
||||
} else {
|
||||
prevNext := cur.next
|
||||
r2 := x2.RealReg()
|
||||
// Temporarily spill x1 to stack.
|
||||
cur = m.InsertStoreRegisterAt(x1, cur, true).prev
|
||||
// Then move x2 to x1.
|
||||
cur = linkInstr(cur, m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqa, newOperandReg(x2), x1))
|
||||
linkInstr(cur, prevNext)
|
||||
// Then reload the original value on x1 from stack to r2.
|
||||
m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LastInstrForInsertion implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction {
|
||||
cur := end
|
||||
for cur.kind == nop0 {
|
||||
cur = cur.prev
|
||||
if cur == begin {
|
||||
return end
|
||||
}
|
||||
}
|
||||
switch cur.kind {
|
||||
case jmp:
|
||||
return cur
|
||||
default:
|
||||
return end
|
||||
}
|
||||
}
|
||||
|
||||
// SSABlockLabel implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label {
|
||||
return m.ectx.SsaBlockIDToLabels[id]
|
||||
}
|
992
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go
generated
vendored
Normal file
992
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go
generated
vendored
Normal file
@ -0,0 +1,992 @@
|
||||
package amd64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
var swizzleMask = [16]byte{
|
||||
0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
|
||||
0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
|
||||
}
|
||||
|
||||
func (m *machine) lowerSwizzle(x, y ssa.Value, ret ssa.Value) {
|
||||
masklabel := m.getOrAllocateConstLabel(&m.constSwizzleMaskConstIndex, swizzleMask[:])
|
||||
|
||||
// Load mask to maskReg.
|
||||
maskReg := m.c.AllocateVReg(ssa.TypeV128)
|
||||
loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(masklabel)), maskReg)
|
||||
m.insert(loadMask)
|
||||
|
||||
// Copy x and y to tmp registers.
|
||||
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
tmpDst := m.copyToTmp(xx.reg())
|
||||
yy := m.getOperand_Reg(m.c.ValueDefinition(y))
|
||||
tmpX := m.copyToTmp(yy.reg())
|
||||
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddusb, newOperandReg(maskReg), tmpX))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpX), tmpDst))
|
||||
|
||||
// Copy the result to the destination register.
|
||||
m.copyTo(tmpDst, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerInsertLane(x, y ssa.Value, index byte, ret ssa.Value, lane ssa.VecLane) {
|
||||
// Copy x to tmp.
|
||||
tmpDst := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, m.getOperand_Mem_Reg(m.c.ValueDefinition(x)), tmpDst))
|
||||
|
||||
yy := m.getOperand_Reg(m.c.ValueDefinition(y))
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, index, yy, tmpDst))
|
||||
case ssa.VecLaneI16x8:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, index, yy, tmpDst))
|
||||
case ssa.VecLaneI32x4:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, index, yy, tmpDst))
|
||||
case ssa.VecLaneI64x2:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, index, yy, tmpDst))
|
||||
case ssa.VecLaneF32x4:
|
||||
// In INSERTPS instruction, the destination index is encoded at 4 and 5 bits of the argument.
|
||||
// See https://www.felixcloutier.com/x86/insertps
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeInsertps, index<<4, yy, tmpDst))
|
||||
case ssa.VecLaneF64x2:
|
||||
if index == 0 {
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, yy, tmpDst))
|
||||
} else {
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMovlhps, yy, tmpDst))
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
m.copyTo(tmpDst, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerExtractLane(x ssa.Value, index byte, signed bool, ret ssa.Value, lane ssa.VecLane) {
|
||||
// Pextr variants are used to extract a lane from a vector register.
|
||||
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
|
||||
tmpDst := m.c.AllocateVReg(ret.Type())
|
||||
m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst))
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrb, index, xx, tmpDst))
|
||||
if signed {
|
||||
m.insert(m.allocateInstr().asMovsxRmR(extModeBL, newOperandReg(tmpDst), tmpDst))
|
||||
} else {
|
||||
m.insert(m.allocateInstr().asMovzxRmR(extModeBL, newOperandReg(tmpDst), tmpDst))
|
||||
}
|
||||
case ssa.VecLaneI16x8:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrw, index, xx, tmpDst))
|
||||
if signed {
|
||||
m.insert(m.allocateInstr().asMovsxRmR(extModeWL, newOperandReg(tmpDst), tmpDst))
|
||||
} else {
|
||||
m.insert(m.allocateInstr().asMovzxRmR(extModeWL, newOperandReg(tmpDst), tmpDst))
|
||||
}
|
||||
case ssa.VecLaneI32x4:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrd, index, xx, tmpDst))
|
||||
case ssa.VecLaneI64x2:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, index, xx, tmpDst))
|
||||
case ssa.VecLaneF32x4:
|
||||
if index == 0 {
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovss, xx, tmpDst))
|
||||
} else {
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, index, xx, tmpDst))
|
||||
}
|
||||
case ssa.VecLaneF64x2:
|
||||
if index == 0 {
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, xx, tmpDst))
|
||||
} else {
|
||||
m.copyTo(xx.reg(), tmpDst)
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0b00_00_11_10, newOperandReg(tmpDst), tmpDst))
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
m.copyTo(tmpDst, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
var sqmulRoundSat = [16]byte{
|
||||
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
|
||||
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
|
||||
}
|
||||
|
||||
func (m *machine) lowerSqmulRoundSat(x, y, ret ssa.Value) {
|
||||
// See https://github.com/WebAssembly/simd/pull/365 for the following logic.
|
||||
maskLabel := m.getOrAllocateConstLabel(&m.constSqmulRoundSatIndex, sqmulRoundSat[:])
|
||||
|
||||
tmp := m.c.AllocateVReg(ssa.TypeV128)
|
||||
loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp)
|
||||
m.insert(loadMask)
|
||||
|
||||
xx, yy := m.getOperand_Reg(m.c.ValueDefinition(x)), m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
|
||||
tmpX := m.copyToTmp(xx.reg())
|
||||
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmulhrsw, yy, tmpX))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmpX), tmp))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmpX))
|
||||
|
||||
m.copyTo(tmpX, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerVUshr(x, y, ret ssa.Value, lane ssa.VecLane) {
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
m.lowerVUshri8x16(x, y, ret)
|
||||
case ssa.VecLaneI16x8, ssa.VecLaneI32x4, ssa.VecLaneI64x2:
|
||||
m.lowerShr(x, y, ret, lane, false)
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
}
|
||||
|
||||
// i8x16LogicalSHRMaskTable is necessary for emulating non-existent packed bytes logical right shifts on amd64.
|
||||
// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits.
|
||||
var i8x16LogicalSHRMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes.
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // for 1 shift
|
||||
0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, // for 2 shift
|
||||
0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, // for 3 shift
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, // for 4 shift
|
||||
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // for 5 shift
|
||||
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, // for 6 shift
|
||||
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // for 7 shift
|
||||
}
|
||||
|
||||
func (m *machine) lowerVUshri8x16(x, y, ret ssa.Value) {
|
||||
tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
|
||||
// Load the modulo 8 mask to tmpReg.
|
||||
m.lowerIconst(tmpGpReg, 0x7, false)
|
||||
// Take the modulo 8 of the shift amount.
|
||||
shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y))
|
||||
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, tmpGpReg, false))
|
||||
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
|
||||
vecTmp := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), vecTmp, false))
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrlw, newOperandReg(vecTmp), xx))
|
||||
|
||||
maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16LogicalSHRMaskTableIndex, i8x16LogicalSHRMaskTable[:])
|
||||
base := m.c.AllocateVReg(ssa.TypeI64)
|
||||
lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base)
|
||||
m.insert(lea)
|
||||
|
||||
// Shift tmpGpReg by 4 to multiply the shift amount by 16.
|
||||
m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false))
|
||||
|
||||
mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0)
|
||||
loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), vecTmp)
|
||||
m.insert(loadMask)
|
||||
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(vecTmp), xx))
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerVSshr(x, y, ret ssa.Value, lane ssa.VecLane) {
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
m.lowerVSshri8x16(x, y, ret)
|
||||
case ssa.VecLaneI16x8, ssa.VecLaneI32x4:
|
||||
m.lowerShr(x, y, ret, lane, true)
|
||||
case ssa.VecLaneI64x2:
|
||||
m.lowerVSshri64x2(x, y, ret)
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerVSshri8x16(x, y, ret ssa.Value) {
|
||||
shiftAmtReg := m.c.AllocateVReg(ssa.TypeI32)
|
||||
// Load the modulo 8 mask to tmpReg.
|
||||
m.lowerIconst(shiftAmtReg, 0x7, false)
|
||||
// Take the modulo 8 of the shift amount.
|
||||
shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y))
|
||||
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, shiftAmtReg, false))
|
||||
|
||||
// Copy the x value to two temporary registers.
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
vecTmp := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.copyTo(xx, vecTmp)
|
||||
|
||||
// Assuming that we have
|
||||
// xx = [b1, ..., b16]
|
||||
// vecTmp = [b1, ..., b16]
|
||||
// at this point, then we use PUNPCKLBW and PUNPCKHBW to produce:
|
||||
// xx = [b1, b1, b2, b2, ..., b8, b8]
|
||||
// vecTmp = [b9, b9, b10, b10, ..., b16, b16]
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpcklbw, newOperandReg(xx), xx))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpckhbw, newOperandReg(vecTmp), vecTmp))
|
||||
|
||||
// Adding 8 to the shift amount, and then move the amount to vecTmp2.
|
||||
vecTmp2 := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(8), shiftAmtReg, false))
|
||||
m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(shiftAmtReg), vecTmp2, false))
|
||||
|
||||
// Perform the word packed arithmetic right shifts on vreg and vecTmp.
|
||||
// This changes these two registers as:
|
||||
// xx = [xxx, b1 >> s, xxx, b2 >> s, ..., xxx, b8 >> s]
|
||||
// vecTmp = [xxx, b9 >> s, xxx, b10 >> s, ..., xxx, b16 >> s]
|
||||
// where xxx is 1 or 0 depending on each byte's sign, and ">>" is the arithmetic shift on a byte.
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), xx))
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), vecTmp))
|
||||
|
||||
// Finally, we can get the result by packing these two word vectors.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePacksswb, newOperandReg(vecTmp), xx))
|
||||
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerVSshri64x2(x, y, ret ssa.Value) {
|
||||
// Load the shift amount to RCX.
|
||||
shiftAmt := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
|
||||
m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, shiftAmt, rcxVReg))
|
||||
|
||||
tmpGp := m.c.AllocateVReg(ssa.TypeI64)
|
||||
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xxReg := m.copyToTmp(_xx.reg())
|
||||
|
||||
m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 0, newOperandReg(xxReg), tmpGp))
|
||||
m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), xxReg))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 1, newOperandReg(xxReg), tmpGp))
|
||||
m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), xxReg))
|
||||
|
||||
m.copyTo(xxReg, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerShr(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) {
|
||||
var modulo uint64
|
||||
var shiftOp sseOpcode
|
||||
switch lane {
|
||||
case ssa.VecLaneI16x8:
|
||||
modulo = 0xf
|
||||
if signed {
|
||||
shiftOp = sseOpcodePsraw
|
||||
} else {
|
||||
shiftOp = sseOpcodePsrlw
|
||||
}
|
||||
case ssa.VecLaneI32x4:
|
||||
modulo = 0x1f
|
||||
if signed {
|
||||
shiftOp = sseOpcodePsrad
|
||||
} else {
|
||||
shiftOp = sseOpcodePsrld
|
||||
}
|
||||
case ssa.VecLaneI64x2:
|
||||
modulo = 0x3f
|
||||
if signed {
|
||||
panic("BUG")
|
||||
}
|
||||
shiftOp = sseOpcodePsrlq
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
|
||||
tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
|
||||
// Load the modulo 8 mask to tmpReg.
|
||||
m.lowerIconst(tmpGpReg, modulo, false)
|
||||
// Take the modulo 8 of the shift amount.
|
||||
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd,
|
||||
m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false))
|
||||
// And move it to a xmm register.
|
||||
tmpVec := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false))
|
||||
|
||||
// Then do the actual shift.
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx))
|
||||
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerVIshl(x, y, ret ssa.Value, lane ssa.VecLane) {
|
||||
var modulo uint64
|
||||
var shiftOp sseOpcode
|
||||
var isI8x16 bool
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
isI8x16 = true
|
||||
modulo = 0x7
|
||||
shiftOp = sseOpcodePsllw
|
||||
case ssa.VecLaneI16x8:
|
||||
modulo = 0xf
|
||||
shiftOp = sseOpcodePsllw
|
||||
case ssa.VecLaneI32x4:
|
||||
modulo = 0x1f
|
||||
shiftOp = sseOpcodePslld
|
||||
case ssa.VecLaneI64x2:
|
||||
modulo = 0x3f
|
||||
shiftOp = sseOpcodePsllq
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
|
||||
tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
|
||||
// Load the modulo 8 mask to tmpReg.
|
||||
m.lowerIconst(tmpGpReg, modulo, false)
|
||||
// Take the modulo 8 of the shift amount.
|
||||
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd,
|
||||
m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false))
|
||||
// And move it to a xmm register.
|
||||
tmpVec := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false))
|
||||
|
||||
// Then do the actual shift.
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx))
|
||||
|
||||
if isI8x16 {
|
||||
maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16SHLMaskTableIndex, i8x16SHLMaskTable[:])
|
||||
base := m.c.AllocateVReg(ssa.TypeI64)
|
||||
lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base)
|
||||
m.insert(lea)
|
||||
|
||||
// Shift tmpGpReg by 4 to multiply the shift amount by 16.
|
||||
m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false))
|
||||
|
||||
mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0)
|
||||
loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), tmpVec)
|
||||
m.insert(loadMask)
|
||||
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(tmpVec), xx))
|
||||
}
|
||||
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
// i8x16SHLMaskTable is necessary for emulating non-existent packed bytes left shifts on amd64.
|
||||
// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits.
|
||||
var i8x16SHLMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes.
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift
|
||||
0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, // for 1 shift
|
||||
0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, // for 2 shift
|
||||
0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, // for 3 shift
|
||||
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // for 4 shift
|
||||
0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, // for 5 shift
|
||||
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, // for 6 shift
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, // for 7 shift
|
||||
}
|
||||
|
||||
func (m *machine) lowerVRound(x, ret ssa.Value, imm byte, _64 bool) {
|
||||
xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
|
||||
var round sseOpcode
|
||||
if _64 {
|
||||
round = sseOpcodeRoundpd
|
||||
} else {
|
||||
round = sseOpcodeRoundps
|
||||
}
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmRImm(round, imm, xx, m.c.VRegOf(ret)))
|
||||
}
|
||||
|
||||
var (
|
||||
allOnesI8x16 = [16]byte{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}
|
||||
allOnesI16x8 = [16]byte{0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0}
|
||||
extAddPairwiseI16x8uMask1 = [16]byte{0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80}
|
||||
extAddPairwiseI16x8uMask2 = [16]byte{0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00}
|
||||
)
|
||||
|
||||
func (m *machine) lowerExtIaddPairwise(x, ret ssa.Value, srcLane ssa.VecLane, signed bool) {
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
switch srcLane {
|
||||
case ssa.VecLaneI8x16:
|
||||
allOneReg := m.c.AllocateVReg(ssa.TypeV128)
|
||||
mask := m.getOrAllocateConstLabel(&m.constAllOnesI8x16Index, allOnesI8x16[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOneReg))
|
||||
|
||||
var resultReg regalloc.VReg
|
||||
if signed {
|
||||
resultReg = allOneReg
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(xx), resultReg))
|
||||
} else {
|
||||
// Interpreter tmp (all ones) as signed byte meaning that all the multiply-add is unsigned.
|
||||
resultReg = xx
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(allOneReg), resultReg))
|
||||
}
|
||||
m.copyTo(resultReg, m.c.VRegOf(ret))
|
||||
|
||||
case ssa.VecLaneI16x8:
|
||||
if signed {
|
||||
allOnesReg := m.c.AllocateVReg(ssa.TypeV128)
|
||||
mask := m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOnesReg))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(allOnesReg), xx))
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
} else {
|
||||
maskReg := m.c.AllocateVReg(ssa.TypeV128)
|
||||
mask := m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask1Index, extAddPairwiseI16x8uMask1[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
|
||||
|
||||
// Flip the sign bits on xx.
|
||||
//
|
||||
// Assuming that xx = [w1, ..., w8], now we have,
|
||||
// xx[i] = int8(-w1) for i = 0...8
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(maskReg), xx))
|
||||
|
||||
mask = m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
|
||||
|
||||
// For i = 0,..4 (as this results in i32x4 lanes), now we have
|
||||
// xx[i] = int32(-wn + -w(n+1)) = int32(-(wn + w(n+1)))
|
||||
// c.assembler.CompileRegisterToRegister(amd64.PMADDWD, tmp, vr)
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(maskReg), xx))
|
||||
|
||||
mask = m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask2Index, extAddPairwiseI16x8uMask2[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
|
||||
|
||||
// vr[i] = int32(-(wn + w(n+1))) + int32(math.MaxInt16+1) = int32((wn + w(n+1))) = uint32(wn + w(n+1)).
|
||||
// c.assembler.CompileRegisterToRegister(amd64.PADDD, tmp, vr)
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(maskReg), xx))
|
||||
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", srcLane))
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerWidenLow(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
|
||||
var sseOp sseOpcode
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
if signed {
|
||||
sseOp = sseOpcodePmovsxbw
|
||||
} else {
|
||||
sseOp = sseOpcodePmovzxbw
|
||||
}
|
||||
case ssa.VecLaneI16x8:
|
||||
if signed {
|
||||
sseOp = sseOpcodePmovsxwd
|
||||
} else {
|
||||
sseOp = sseOpcodePmovzxwd
|
||||
}
|
||||
case ssa.VecLaneI32x4:
|
||||
if signed {
|
||||
sseOp = sseOpcodePmovsxdq
|
||||
} else {
|
||||
sseOp = sseOpcodePmovzxdq
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, xx, m.c.VRegOf(ret)))
|
||||
}
|
||||
|
||||
func (m *machine) lowerWidenHigh(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
|
||||
tmp := m.c.AllocateVReg(ssa.TypeV128)
|
||||
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
m.copyTo(xx.reg(), tmp)
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePalignr, 8, newOperandReg(tmp), tmp))
|
||||
|
||||
var sseOp sseOpcode
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
if signed {
|
||||
sseOp = sseOpcodePmovsxbw
|
||||
} else {
|
||||
sseOp = sseOpcodePmovzxbw
|
||||
}
|
||||
case ssa.VecLaneI16x8:
|
||||
if signed {
|
||||
sseOp = sseOpcodePmovsxwd
|
||||
} else {
|
||||
sseOp = sseOpcodePmovzxwd
|
||||
}
|
||||
case ssa.VecLaneI32x4:
|
||||
if signed {
|
||||
sseOp = sseOpcodePmovsxdq
|
||||
} else {
|
||||
sseOp = sseOpcodePmovzxdq
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, newOperandReg(tmp), m.c.VRegOf(ret)))
|
||||
}
|
||||
|
||||
func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, ret ssa.Value, lane ssa.VecLane) {
|
||||
tmpDst, tmpGp := m.c.AllocateVReg(ssa.TypeV128), m.c.AllocateVReg(ssa.TypeI64)
|
||||
am := newOperandMem(m.lowerToAddressMode(ptr, offset))
|
||||
|
||||
m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst))
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, am, tmpGp))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, 0, newOperandReg(tmpGp), tmpDst))
|
||||
tmpZeroVec := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asZeros(tmpZeroVec))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpZeroVec), tmpDst))
|
||||
case ssa.VecLaneI16x8:
|
||||
m.insert(m.allocateInstr().asMovzxRmR(extModeWQ, am, tmpGp))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 0, newOperandReg(tmpGp), tmpDst))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 1, newOperandReg(tmpGp), tmpDst))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
|
||||
case ssa.VecLaneI32x4:
|
||||
m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, am, tmpGp))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, 0, newOperandReg(tmpGp), tmpDst))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
|
||||
case ssa.VecLaneI64x2:
|
||||
m.insert(m.allocateInstr().asMov64MR(am, tmpGp))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), tmpDst))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), tmpDst))
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
m.copyTo(tmpDst, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
var f64x2CvtFromIMask = [16]byte{
|
||||
0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
}
|
||||
|
||||
func (m *machine) lowerVFcvtFromInt(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
|
||||
switch lane {
|
||||
case ssa.VecLaneF32x4:
|
||||
if signed {
|
||||
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, xx, m.c.VRegOf(ret)))
|
||||
} else {
|
||||
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
// Copy the value to two temporary registers.
|
||||
tmp := m.copyToTmp(xx.reg())
|
||||
tmp2 := m.copyToTmp(xx.reg())
|
||||
|
||||
// Clear the higher 16 bits of each 32-bit element.
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePslld, newOperandImm32(0xa), tmp))
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0xa), tmp))
|
||||
|
||||
// Subtract the higher 16-bits from tmp2: clear the lower 16-bits of tmp2.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubd, newOperandReg(tmp), tmp2))
|
||||
|
||||
// Convert the lower 16-bits in tmp.
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp))
|
||||
|
||||
// Left shift by one and convert tmp2, meaning that halved conversion result of higher 16-bits in tmp2.
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(1), tmp2))
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp2), tmp2))
|
||||
|
||||
// Double the converted halved higher 16bits.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp2), tmp2))
|
||||
|
||||
// Get the conversion result by add tmp (holding lower 16-bit conversion) into tmp2.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp), tmp2))
|
||||
|
||||
m.copyTo(tmp2, m.c.VRegOf(ret))
|
||||
}
|
||||
case ssa.VecLaneF64x2:
|
||||
if signed {
|
||||
xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2pd, xx, m.c.VRegOf(ret)))
|
||||
} else {
|
||||
maskReg := m.c.AllocateVReg(ssa.TypeV128)
|
||||
maskLabel := m.getOrAllocateConstLabel(&m.constF64x2CvtFromIMaskIndex, f64x2CvtFromIMask[:])
|
||||
// maskReg = [0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg))
|
||||
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
|
||||
// Given that we have xx = [d1, d2, d3, d4], this results in
|
||||
// xx = [d1, [0x00, 0x00, 0x30, 0x43], d2, [0x00, 0x00, 0x30, 0x43]]
|
||||
// = [float64(uint32(d1)) + 0x1.0p52, float64(uint32(d2)) + 0x1.0p52]
|
||||
// ^See https://stackoverflow.com/questions/13269523/can-all-32-bit-ints-be-exactly-represented-as-a-double
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeUnpcklps, newOperandReg(maskReg), xx))
|
||||
|
||||
// maskReg = [float64(0x1.0p52), float64(0x1.0p52)]
|
||||
maskLabel = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg))
|
||||
|
||||
// Now, we get the result as
|
||||
// xx = [float64(uint32(d1)), float64(uint32(d2))]
|
||||
// because the following equality always satisfies:
|
||||
// float64(0x1.0p52 + float64(uint32(x))) - float64(0x1.0p52 + float64(uint32(y))) = float64(uint32(x)) - float64(uint32(y))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubpd, newOperandReg(maskReg), xx))
|
||||
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
// i32sMaxOnF64x2 holds math.MaxInt32(=2147483647.0) on two f64 lanes.
|
||||
i32sMaxOnF64x2 = [16]byte{
|
||||
0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0)
|
||||
0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0)
|
||||
}
|
||||
|
||||
// i32sMaxOnF64x2 holds math.MaxUint32(=4294967295.0) on two f64 lanes.
|
||||
i32uMaxOnF64x2 = [16]byte{
|
||||
0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0)
|
||||
0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0)
|
||||
}
|
||||
|
||||
// twop52 holds two float64(0x1.0p52) on two f64 lanes. 0x1.0p52 is special in the sense that
|
||||
// with this exponent, the mantissa represents a corresponding uint32 number, and arithmetics,
|
||||
// like addition or subtraction, the resulted floating point holds exactly the same
|
||||
// bit representations in 32-bit integer on its mantissa.
|
||||
//
|
||||
// Note: the name twop52 is common across various compiler ecosystem.
|
||||
// E.g. https://github.com/llvm/llvm-project/blob/92ab024f81e5b64e258b7c3baaf213c7c26fcf40/compiler-rt/lib/builtins/floatdidf.c#L28
|
||||
// E.g. https://opensource.apple.com/source/clang/clang-425.0.24/src/projects/compiler-rt/lib/floatdidf.c.auto.html
|
||||
twop52 = [16]byte{
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52)
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52)
|
||||
}
|
||||
)
|
||||
|
||||
func (m *machine) lowerVFcvtToIntSat(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
|
||||
switch lane {
|
||||
case ssa.VecLaneF32x4:
|
||||
if signed {
|
||||
tmp := m.copyToTmp(xx)
|
||||
|
||||
// Assuming we have xx = [v1, v2, v3, v4].
|
||||
//
|
||||
// Set all bits if lane is not NaN on tmp.
|
||||
// tmp[i] = 0xffffffff if vi != NaN
|
||||
// = 0 if vi == NaN
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp))
|
||||
|
||||
// Clear NaN lanes on xx, meaning that
|
||||
// xx[i] = vi if vi != NaN
|
||||
// 0 if vi == NaN
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp), xx))
|
||||
|
||||
// tmp[i] = ^vi if vi != NaN
|
||||
// = 0xffffffff if vi == NaN
|
||||
// which means that tmp[i] & 0x80000000 != 0 if and only if vi is negative.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeXorps, newOperandReg(xx), tmp))
|
||||
|
||||
// xx[i] = int32(vi) if vi != NaN and xx is not overflowing.
|
||||
// = 0x80000000 if vi != NaN and xx is overflowing (See https://www.felixcloutier.com/x86/cvttps2dq)
|
||||
// = 0 if vi == NaN
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx))
|
||||
|
||||
// Below, we have to convert 0x80000000 into 0x7FFFFFFF for positive overflowing lane.
|
||||
//
|
||||
// tmp[i] = 0x80000000 if vi is positive
|
||||
// = any satisfying any&0x80000000 = 0 if vi is negative or zero.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(xx), tmp))
|
||||
|
||||
// Arithmetic right shifting tmp by 31, meaning that we have
|
||||
// tmp[i] = 0xffffffff if vi is positive, 0 otherwise.
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrad, newOperandImm32(0x1f), tmp))
|
||||
|
||||
// Flipping 0x80000000 if vi is positive, otherwise keep intact.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), xx))
|
||||
} else {
|
||||
tmp := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asZeros(tmp))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxps, newOperandReg(tmp), xx))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmp), tmp))
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0x1), tmp))
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp))
|
||||
tmp2 := m.copyToTmp(xx)
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubps, newOperandReg(tmp), tmp2))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredLE_OS), newOperandReg(tmp2), tmp))
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(tmp2), tmp2))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp2))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaxsd, newOperandReg(tmp), tmp2))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(tmp2), xx))
|
||||
}
|
||||
|
||||
case ssa.VecLaneF64x2:
|
||||
tmp2 := m.c.AllocateVReg(ssa.TypeV128)
|
||||
if signed {
|
||||
tmp := m.copyToTmp(xx)
|
||||
|
||||
// Set all bits for non-NaN lanes, zeros otherwise.
|
||||
// I.e. tmp[i] = 0xffffffff_ffffffff if vi != NaN, 0 otherwise.
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmppd, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp))
|
||||
|
||||
maskLabel := m.getOrAllocateConstLabel(&m.constI32sMaxOnF64x2Index, i32sMaxOnF64x2[:])
|
||||
// Load the 2147483647 into tmp2's each lane.
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp2))
|
||||
|
||||
// tmp[i] = 2147483647 if vi != NaN, 0 otherwise.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp2), tmp))
|
||||
|
||||
// MINPD returns the source register's value as-is, so we have
|
||||
// xx[i] = vi if vi != NaN
|
||||
// = 0 if vi == NaN
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp), xx))
|
||||
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttpd2dq, newOperandReg(xx), xx))
|
||||
} else {
|
||||
tmp := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asZeros(tmp))
|
||||
|
||||
// xx[i] = vi if vi != NaN && vi > 0
|
||||
// = 0 if vi == NaN || vi <= 0
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxpd, newOperandReg(tmp), xx))
|
||||
|
||||
// tmp2[i] = float64(math.MaxUint32) = math.MaxUint32
|
||||
maskIndex := m.getOrAllocateConstLabel(&m.constI32uMaxOnF64x2Index, i32uMaxOnF64x2[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2))
|
||||
|
||||
// xx[i] = vi if vi != NaN && vi > 0 && vi <= math.MaxUint32
|
||||
// = 0 otherwise
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp2), xx))
|
||||
|
||||
// Round the floating points into integer.
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeRoundpd, 0x3, newOperandReg(xx), xx))
|
||||
|
||||
// tmp2[i] = float64(0x1.0p52)
|
||||
maskIndex = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2))
|
||||
|
||||
// xx[i] = float64(0x1.0p52) + float64(uint32(vi)) if vi != NaN && vi > 0 && vi <= math.MaxUint32
|
||||
// = 0 otherwise
|
||||
//
|
||||
// This means that xx[i] holds exactly the same bit of uint32(vi) in its lower 32-bits.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddpd, newOperandReg(tmp2), xx))
|
||||
|
||||
// At this point, we have
|
||||
// xx = [uint32(v0), float64(0x1.0p52), uint32(v1), float64(0x1.0p52)]
|
||||
// tmp = [0, 0, 0, 0]
|
||||
// as 32x4 lanes. Therefore, SHUFPS with 0b00_00_10_00 results in
|
||||
// xx = [xx[00], xx[10], tmp[00], tmp[00]] = [xx[00], xx[10], 0, 0]
|
||||
// meaning that for i = 0 and 1, we have
|
||||
// xx[i] = uint32(vi) if vi != NaN && vi > 0 && vi <= math.MaxUint32
|
||||
// = 0 otherwise.
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeShufps, 0b00_00_10_00, newOperandReg(tmp), xx))
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerNarrow(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) {
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
|
||||
|
||||
var sseOp sseOpcode
|
||||
switch lane {
|
||||
case ssa.VecLaneI16x8:
|
||||
if signed {
|
||||
sseOp = sseOpcodePacksswb
|
||||
} else {
|
||||
sseOp = sseOpcodePackuswb
|
||||
}
|
||||
case ssa.VecLaneI32x4:
|
||||
if signed {
|
||||
sseOp = sseOpcodePackssdw
|
||||
} else {
|
||||
sseOp = sseOpcodePackusdw
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOp, yy, xx))
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerWideningPairwiseDotProductS(x, y, ret ssa.Value) {
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, yy, xx))
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerVIabs(instr *ssa.Instruction) {
|
||||
x, lane := instr.ArgWithLane()
|
||||
rd := m.c.VRegOf(instr.Return())
|
||||
|
||||
if lane == ssa.VecLaneI64x2 {
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
|
||||
blendReg := xmm0VReg
|
||||
m.insert(m.allocateInstr().asDefineUninitializedReg(blendReg))
|
||||
|
||||
tmp := m.copyToTmp(_xx.reg())
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
|
||||
// Clear all bits on blendReg.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(blendReg), blendReg))
|
||||
// Subtract xx from blendMaskReg.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubq, newOperandReg(xx), blendReg))
|
||||
// Copy the subtracted value ^^ back into tmp.
|
||||
m.copyTo(blendReg, xx)
|
||||
|
||||
m.insert(m.allocateInstr().asBlendvpd(newOperandReg(tmp), xx))
|
||||
|
||||
m.copyTo(xx, rd)
|
||||
} else {
|
||||
var vecOp sseOpcode
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
vecOp = sseOpcodePabsb
|
||||
case ssa.VecLaneI16x8:
|
||||
vecOp = sseOpcodePabsw
|
||||
case ssa.VecLaneI32x4:
|
||||
vecOp = sseOpcodePabsd
|
||||
}
|
||||
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
|
||||
i := m.allocateInstr()
|
||||
i.asXmmUnaryRmR(vecOp, rn, rd)
|
||||
m.insert(i)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerVIpopcnt(instr *ssa.Instruction) {
|
||||
x := instr.Arg()
|
||||
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
rd := m.c.VRegOf(instr.Return())
|
||||
|
||||
tmp1 := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.lowerVconst(tmp1, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f)
|
||||
|
||||
// Copy input into tmp2.
|
||||
tmp2 := m.copyToTmp(rn.reg())
|
||||
|
||||
// Given that we have:
|
||||
// rm = [b1, ..., b16] where bn = hn:ln and hn and ln are higher and lower 4-bits of bn.
|
||||
//
|
||||
// Take PAND on tmp1 and tmp2, so that we mask out all the higher bits.
|
||||
// tmp2 = [l1, ..., l16].
|
||||
pand := m.allocateInstr()
|
||||
pand.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp2)
|
||||
m.insert(pand)
|
||||
|
||||
// Do logical (packed word) right shift by 4 on rm and PAND against the mask (tmp1); meaning that we have
|
||||
// tmp3 = [h1, ...., h16].
|
||||
tmp3 := m.copyToTmp(rn.reg())
|
||||
psrlw := m.allocateInstr()
|
||||
psrlw.asXmmRmiReg(sseOpcodePsrlw, newOperandImm32(4), tmp3)
|
||||
m.insert(psrlw)
|
||||
|
||||
pand2 := m.allocateInstr()
|
||||
pand2.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp3)
|
||||
m.insert(pand2)
|
||||
|
||||
// Read the popcntTable into tmp4, and we have
|
||||
// tmp4 = [0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04]
|
||||
tmp4 := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.lowerVconst(tmp4, 0x03_02_02_01_02_01_01_00, 0x04_03_03_02_03_02_02_01)
|
||||
|
||||
// Make a copy for later.
|
||||
tmp5 := m.copyToTmp(tmp4)
|
||||
|
||||
// tmp4 = [popcnt(l1), ..., popcnt(l16)].
|
||||
pshufb := m.allocateInstr()
|
||||
pshufb.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp2), tmp4)
|
||||
m.insert(pshufb)
|
||||
|
||||
pshufb2 := m.allocateInstr()
|
||||
pshufb2.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp3), tmp5)
|
||||
m.insert(pshufb2)
|
||||
|
||||
// tmp4 + tmp5 is the result.
|
||||
paddb := m.allocateInstr()
|
||||
paddb.asXmmRmR(sseOpcodePaddb, newOperandReg(tmp4), tmp5)
|
||||
m.insert(paddb)
|
||||
|
||||
m.copyTo(tmp5, rd)
|
||||
}
|
||||
|
||||
func (m *machine) lowerVImul(instr *ssa.Instruction) {
|
||||
x, y, lane := instr.Arg2WithLane()
|
||||
rd := m.c.VRegOf(instr.Return())
|
||||
if lane == ssa.VecLaneI64x2 {
|
||||
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
rm := m.getOperand_Reg(m.c.ValueDefinition(y))
|
||||
// Assuming that we have
|
||||
// rm = [p1, p2] = [p1_lo, p1_hi, p2_lo, p2_high]
|
||||
// rn = [q1, q2] = [q1_lo, q1_hi, q2_lo, q2_high]
|
||||
// where pN and qN are 64-bit (quad word) lane, and pN_lo, pN_hi, qN_lo and qN_hi are 32-bit (double word) lane.
|
||||
|
||||
// Copy rn into tmp1.
|
||||
tmp1 := m.copyToTmp(rn.reg())
|
||||
|
||||
// And do the logical right shift by 32-bit on tmp1, which makes tmp1 = [0, p1_high, 0, p2_high]
|
||||
shift := m.allocateInstr()
|
||||
shift.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp1)
|
||||
m.insert(shift)
|
||||
|
||||
// Execute "pmuludq rm,tmp1", which makes tmp1 = [p1_high*q1_lo, p2_high*q2_lo] where each lane is 64-bit.
|
||||
mul := m.allocateInstr()
|
||||
mul.asXmmRmR(sseOpcodePmuludq, rm, tmp1)
|
||||
m.insert(mul)
|
||||
|
||||
// Copy rm value into tmp2.
|
||||
tmp2 := m.copyToTmp(rm.reg())
|
||||
|
||||
// And do the logical right shift by 32-bit on tmp2, which makes tmp2 = [0, q1_high, 0, q2_high]
|
||||
shift2 := m.allocateInstr()
|
||||
shift2.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp2)
|
||||
m.insert(shift2)
|
||||
|
||||
// Execute "pmuludq rm,tmp2", which makes tmp2 = [p1_lo*q1_high, p2_lo*q2_high] where each lane is 64-bit.
|
||||
mul2 := m.allocateInstr()
|
||||
mul2.asXmmRmR(sseOpcodePmuludq, rn, tmp2)
|
||||
m.insert(mul2)
|
||||
|
||||
// Adds tmp1 and tmp2 and do the logical left shift by 32-bit,
|
||||
// which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32, (p2_lo*q2_high+p2_high*q2_lo)<<32]
|
||||
add := m.allocateInstr()
|
||||
add.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp2), tmp1)
|
||||
m.insert(add)
|
||||
|
||||
shift3 := m.allocateInstr()
|
||||
shift3.asXmmRmiReg(sseOpcodePsllq, newOperandImm32(32), tmp1)
|
||||
m.insert(shift3)
|
||||
|
||||
// Copy rm value into tmp3.
|
||||
tmp3 := m.copyToTmp(rm.reg())
|
||||
|
||||
// "pmuludq rm,tmp3" makes tmp3 = [p1_lo*q1_lo, p2_lo*q2_lo] where each lane is 64-bit.
|
||||
mul3 := m.allocateInstr()
|
||||
mul3.asXmmRmR(sseOpcodePmuludq, rn, tmp3)
|
||||
m.insert(mul3)
|
||||
|
||||
// Finally, we get the result by computing tmp1 + tmp3,
|
||||
// which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32+p1_lo*q1_lo, (p2_lo*q2_high+p2_high*q2_lo)<<32+p2_lo*q2_lo]
|
||||
add2 := m.allocateInstr()
|
||||
add2.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp3), tmp1)
|
||||
m.insert(add2)
|
||||
|
||||
m.copyTo(tmp1, rd)
|
||||
|
||||
} else {
|
||||
var vecOp sseOpcode
|
||||
switch lane {
|
||||
case ssa.VecLaneI16x8:
|
||||
vecOp = sseOpcodePmullw
|
||||
case ssa.VecLaneI32x4:
|
||||
vecOp = sseOpcodePmulld
|
||||
default:
|
||||
panic("unsupported: " + lane.String())
|
||||
}
|
||||
m.lowerVbBinOp(vecOp, x, y, instr.Return())
|
||||
}
|
||||
}
|
346
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go
generated
vendored
Normal file
346
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go
generated
vendored
Normal file
@ -0,0 +1,346 @@
|
||||
package amd64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
type operand struct {
|
||||
kind operandKind
|
||||
data uint64
|
||||
}
|
||||
|
||||
type operandKind byte
|
||||
|
||||
const (
|
||||
// operandKindReg is an operand which is an integer Register.
|
||||
operandKindReg operandKind = iota + 1
|
||||
|
||||
// operandKindMem is a value in Memory.
|
||||
// 32, 64, or 128 bit value.
|
||||
operandKindMem
|
||||
|
||||
// operandKindImm32 is a signed-32-bit integer immediate value.
|
||||
operandKindImm32
|
||||
|
||||
// operandKindLabel is a label.
|
||||
operandKindLabel
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (o operandKind) String() string {
|
||||
switch o {
|
||||
case operandKindReg:
|
||||
return "reg"
|
||||
case operandKindMem:
|
||||
return "mem"
|
||||
case operandKindImm32:
|
||||
return "imm32"
|
||||
case operandKindLabel:
|
||||
return "label"
|
||||
default:
|
||||
panic("BUG: invalid operand kind")
|
||||
}
|
||||
}
|
||||
|
||||
// format returns the string representation of the operand.
|
||||
// _64 is only for the case where the operand is a register, and it's integer.
|
||||
func (o *operand) format(_64 bool) string {
|
||||
switch o.kind {
|
||||
case operandKindReg:
|
||||
return formatVRegSized(o.reg(), _64)
|
||||
case operandKindMem:
|
||||
return o.addressMode().String()
|
||||
case operandKindImm32:
|
||||
return fmt.Sprintf("$%d", int32(o.imm32()))
|
||||
case operandKindLabel:
|
||||
return backend.Label(o.imm32()).String()
|
||||
default:
|
||||
panic(fmt.Sprintf("BUG: invalid operand: %s", o.kind))
|
||||
}
|
||||
}
|
||||
|
||||
//go:inline
|
||||
func (o *operand) reg() regalloc.VReg {
|
||||
return regalloc.VReg(o.data)
|
||||
}
|
||||
|
||||
//go:inline
|
||||
func (o *operand) setReg(r regalloc.VReg) {
|
||||
o.data = uint64(r)
|
||||
}
|
||||
|
||||
//go:inline
|
||||
func (o *operand) addressMode() *amode {
|
||||
return wazevoapi.PtrFromUintptr[amode](uintptr(o.data))
|
||||
}
|
||||
|
||||
//go:inline
|
||||
func (o *operand) imm32() uint32 {
|
||||
return uint32(o.data)
|
||||
}
|
||||
|
||||
func (o *operand) label() backend.Label {
|
||||
switch o.kind {
|
||||
case operandKindLabel:
|
||||
return backend.Label(o.data)
|
||||
case operandKindMem:
|
||||
mem := o.addressMode()
|
||||
if mem.kind() != amodeRipRel {
|
||||
panic("BUG: invalid label")
|
||||
}
|
||||
return backend.Label(mem.imm32)
|
||||
default:
|
||||
panic("BUG: invalid operand kind")
|
||||
}
|
||||
}
|
||||
|
||||
func newOperandLabel(label backend.Label) operand {
|
||||
return operand{kind: operandKindLabel, data: uint64(label)}
|
||||
}
|
||||
|
||||
func newOperandReg(r regalloc.VReg) operand {
|
||||
return operand{kind: operandKindReg, data: uint64(r)}
|
||||
}
|
||||
|
||||
func newOperandImm32(imm32 uint32) operand {
|
||||
return operand{kind: operandKindImm32, data: uint64(imm32)}
|
||||
}
|
||||
|
||||
func newOperandMem(amode *amode) operand {
|
||||
return operand{kind: operandKindMem, data: uint64(uintptr(unsafe.Pointer(amode)))}
|
||||
}
|
||||
|
||||
// amode is a memory operand (addressing mode).
|
||||
type amode struct {
|
||||
kindWithShift uint32
|
||||
imm32 uint32
|
||||
base regalloc.VReg
|
||||
|
||||
// For amodeRegRegShift:
|
||||
index regalloc.VReg
|
||||
}
|
||||
|
||||
type amodeKind byte
|
||||
|
||||
const (
|
||||
// amodeRegRegShift calculates sign-extend-32-to-64(Immediate) + base
|
||||
amodeImmReg amodeKind = iota + 1
|
||||
|
||||
// amodeImmRBP is the same as amodeImmReg, but the base register is fixed to RBP.
|
||||
// The only differece is that it doesn't tell the register allocator to use RBP which is distracting for the
|
||||
// register allocator.
|
||||
amodeImmRBP
|
||||
|
||||
// amodeRegRegShift calculates sign-extend-32-to-64(Immediate) + base + (Register2 << Shift)
|
||||
amodeRegRegShift
|
||||
|
||||
// amodeRipRel is a RIP-relative addressing mode specified by the label.
|
||||
amodeRipRel
|
||||
|
||||
// TODO: there are other addressing modes such as the one without base register.
|
||||
)
|
||||
|
||||
func (a *amode) kind() amodeKind {
|
||||
return amodeKind(a.kindWithShift & 0xff)
|
||||
}
|
||||
|
||||
func (a *amode) shift() byte {
|
||||
return byte(a.kindWithShift >> 8)
|
||||
}
|
||||
|
||||
func (a *amode) uses(rs *[]regalloc.VReg) {
|
||||
switch a.kind() {
|
||||
case amodeImmReg:
|
||||
*rs = append(*rs, a.base)
|
||||
case amodeRegRegShift:
|
||||
*rs = append(*rs, a.base, a.index)
|
||||
case amodeImmRBP, amodeRipRel:
|
||||
default:
|
||||
panic("BUG: invalid amode kind")
|
||||
}
|
||||
}
|
||||
|
||||
func (a *amode) nregs() int {
|
||||
switch a.kind() {
|
||||
case amodeImmReg:
|
||||
return 1
|
||||
case amodeRegRegShift:
|
||||
return 2
|
||||
case amodeImmRBP, amodeRipRel:
|
||||
return 0
|
||||
default:
|
||||
panic("BUG: invalid amode kind")
|
||||
}
|
||||
}
|
||||
|
||||
func (a *amode) assignUses(i int, reg regalloc.VReg) {
|
||||
switch a.kind() {
|
||||
case amodeImmReg:
|
||||
if i == 0 {
|
||||
a.base = reg
|
||||
} else {
|
||||
panic("BUG: invalid amode assignment")
|
||||
}
|
||||
case amodeRegRegShift:
|
||||
if i == 0 {
|
||||
a.base = reg
|
||||
} else if i == 1 {
|
||||
a.index = reg
|
||||
} else {
|
||||
panic("BUG: invalid amode assignment")
|
||||
}
|
||||
default:
|
||||
panic("BUG: invalid amode assignment")
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) newAmodeImmReg(imm32 uint32, base regalloc.VReg) *amode {
|
||||
ret := m.amodePool.Allocate()
|
||||
*ret = amode{kindWithShift: uint32(amodeImmReg), imm32: imm32, base: base}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (m *machine) newAmodeImmRBPReg(imm32 uint32) *amode {
|
||||
ret := m.amodePool.Allocate()
|
||||
*ret = amode{kindWithShift: uint32(amodeImmRBP), imm32: imm32, base: rbpVReg}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (m *machine) newAmodeRegRegShift(imm32 uint32, base, index regalloc.VReg, shift byte) *amode {
|
||||
if shift > 3 {
|
||||
panic(fmt.Sprintf("BUG: invalid shift (must be 3>=): %d", shift))
|
||||
}
|
||||
ret := m.amodePool.Allocate()
|
||||
*ret = amode{kindWithShift: uint32(amodeRegRegShift) | uint32(shift)<<8, imm32: imm32, base: base, index: index}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (m *machine) newAmodeRipRel(label backend.Label) *amode {
|
||||
ret := m.amodePool.Allocate()
|
||||
*ret = amode{kindWithShift: uint32(amodeRipRel), imm32: uint32(label)}
|
||||
return ret
|
||||
}
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (a *amode) String() string {
|
||||
switch a.kind() {
|
||||
case amodeImmReg, amodeImmRBP:
|
||||
if a.imm32 == 0 {
|
||||
return fmt.Sprintf("(%s)", formatVRegSized(a.base, true))
|
||||
}
|
||||
return fmt.Sprintf("%d(%s)", int32(a.imm32), formatVRegSized(a.base, true))
|
||||
case amodeRegRegShift:
|
||||
shift := 1 << a.shift()
|
||||
if a.imm32 == 0 {
|
||||
return fmt.Sprintf(
|
||||
"(%s,%s,%d)",
|
||||
formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift)
|
||||
}
|
||||
return fmt.Sprintf(
|
||||
"%d(%s,%s,%d)",
|
||||
int32(a.imm32), formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift)
|
||||
case amodeRipRel:
|
||||
return fmt.Sprintf("%s(%%rip)", backend.Label(a.imm32))
|
||||
default:
|
||||
panic("BUG: invalid amode kind")
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) getOperand_Mem_Reg(def *backend.SSAValueDefinition) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return newOperandReg(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
if def.SSAValue().Type() == ssa.TypeV128 {
|
||||
// SIMD instructions require strict memory alignment, so we don't support the memory operand for V128 at the moment.
|
||||
return m.getOperand_Reg(def)
|
||||
}
|
||||
|
||||
if m.c.MatchInstr(def, ssa.OpcodeLoad) {
|
||||
instr := def.Instr
|
||||
ptr, offset, _ := instr.LoadData()
|
||||
op = newOperandMem(m.lowerToAddressMode(ptr, offset))
|
||||
instr.MarkLowered()
|
||||
return op
|
||||
}
|
||||
return m.getOperand_Reg(def)
|
||||
}
|
||||
|
||||
func (m *machine) getOperand_Mem_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return newOperandReg(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
if m.c.MatchInstr(def, ssa.OpcodeLoad) {
|
||||
instr := def.Instr
|
||||
ptr, offset, _ := instr.LoadData()
|
||||
op = newOperandMem(m.lowerToAddressMode(ptr, offset))
|
||||
instr.MarkLowered()
|
||||
return op
|
||||
}
|
||||
return m.getOperand_Imm32_Reg(def)
|
||||
}
|
||||
|
||||
func (m *machine) getOperand_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return newOperandReg(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
instr := def.Instr
|
||||
if instr.Constant() {
|
||||
// If the operation is 64-bit, x64 sign-extends the 32-bit immediate value.
|
||||
// Therefore, we need to check if the immediate value is within the 32-bit range and if the sign bit is set,
|
||||
// we should not use the immediate value.
|
||||
if op, ok := asImm32Operand(instr.ConstantVal(), instr.Return().Type() == ssa.TypeI32); ok {
|
||||
instr.MarkLowered()
|
||||
return op
|
||||
}
|
||||
}
|
||||
return m.getOperand_Reg(def)
|
||||
}
|
||||
|
||||
func asImm32Operand(val uint64, allowSignExt bool) (operand, bool) {
|
||||
if imm32, ok := asImm32(val, allowSignExt); ok {
|
||||
return newOperandImm32(imm32), true
|
||||
}
|
||||
return operand{}, false
|
||||
}
|
||||
|
||||
func asImm32(val uint64, allowSignExt bool) (uint32, bool) {
|
||||
u32val := uint32(val)
|
||||
if uint64(u32val) != val {
|
||||
return 0, false
|
||||
}
|
||||
if !allowSignExt && u32val&0x80000000 != 0 {
|
||||
return 0, false
|
||||
}
|
||||
return u32val, true
|
||||
}
|
||||
|
||||
func (m *machine) getOperand_Reg(def *backend.SSAValueDefinition) (op operand) {
|
||||
var v regalloc.VReg
|
||||
if def.IsFromBlockParam() {
|
||||
v = def.BlkParamVReg
|
||||
} else {
|
||||
instr := def.Instr
|
||||
if instr.Constant() {
|
||||
// We inline all the constant instructions so that we could reduce the register usage.
|
||||
v = m.lowerConstant(instr)
|
||||
instr.MarkLowered()
|
||||
} else {
|
||||
if n := def.N; n == 0 {
|
||||
v = m.c.VRegOf(instr.Return())
|
||||
} else {
|
||||
_, rs := instr.Returns()
|
||||
v = m.c.VRegOf(rs[n-1])
|
||||
}
|
||||
}
|
||||
}
|
||||
return newOperandReg(v)
|
||||
}
|
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go
generated
vendored
Normal file
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go
generated
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
//go:build !tinygo
|
||||
|
||||
package amd64
|
||||
|
||||
import "reflect"
|
||||
|
||||
// setSliceLimits sets both Cap and Len for the given reflected slice.
|
||||
func setSliceLimits(s *reflect.SliceHeader, limit uintptr) {
|
||||
s.Len = int(limit)
|
||||
s.Cap = int(limit)
|
||||
}
|
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go
generated
vendored
Normal file
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go
generated
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
//go:build tinygo
|
||||
|
||||
package amd64
|
||||
|
||||
import "reflect"
|
||||
|
||||
// setSliceLimits sets both Cap and Len for the given reflected slice.
|
||||
func setSliceLimits(s *reflect.SliceHeader, limit uintptr) {
|
||||
s.Len = limit
|
||||
s.Len = limit
|
||||
}
|
181
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reg.go
generated
vendored
Normal file
181
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reg.go
generated
vendored
Normal file
@ -0,0 +1,181 @@
|
||||
package amd64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
)
|
||||
|
||||
// Amd64-specific registers.
|
||||
const (
|
||||
// rax is a gp register.
|
||||
rax = regalloc.RealRegInvalid + 1 + iota
|
||||
// rcx is a gp register.
|
||||
rcx
|
||||
// rdx is a gp register.
|
||||
rdx
|
||||
// rbx is a gp register.
|
||||
rbx
|
||||
// rsp is a gp register.
|
||||
rsp
|
||||
// rbp is a gp register.
|
||||
rbp
|
||||
// rsi is a gp register.
|
||||
rsi
|
||||
// rdi is a gp register.
|
||||
rdi
|
||||
// r8 is a gp register.
|
||||
r8
|
||||
// r9 is a gp register.
|
||||
r9
|
||||
// r10 is a gp register.
|
||||
r10
|
||||
// r11 is a gp register.
|
||||
r11
|
||||
// r12 is a gp register.
|
||||
r12
|
||||
// r13 is a gp register.
|
||||
r13
|
||||
// r14 is a gp register.
|
||||
r14
|
||||
// r15 is a gp register.
|
||||
r15
|
||||
|
||||
// xmm0 is a vector register.
|
||||
xmm0
|
||||
// xmm1 is a vector register.
|
||||
xmm1
|
||||
// xmm2 is a vector register.
|
||||
xmm2
|
||||
// xmm3 is a vector register.
|
||||
xmm3
|
||||
// xmm4 is a vector register.
|
||||
xmm4
|
||||
// xmm5 is a vector register.
|
||||
xmm5
|
||||
// xmm6 is a vector register.
|
||||
xmm6
|
||||
// xmm7 is a vector register.
|
||||
xmm7
|
||||
// xmm8 is a vector register.
|
||||
xmm8
|
||||
// xmm9 is a vector register.
|
||||
xmm9
|
||||
// xmm10 is a vector register.
|
||||
xmm10
|
||||
// xmm11 is a vector register.
|
||||
xmm11
|
||||
// xmm12 is a vector register.
|
||||
xmm12
|
||||
// xmm13 is a vector register.
|
||||
xmm13
|
||||
// xmm14 is a vector register.
|
||||
xmm14
|
||||
// xmm15 is a vector register.
|
||||
xmm15
|
||||
)
|
||||
|
||||
var (
|
||||
raxVReg = regalloc.FromRealReg(rax, regalloc.RegTypeInt)
|
||||
rcxVReg = regalloc.FromRealReg(rcx, regalloc.RegTypeInt)
|
||||
rdxVReg = regalloc.FromRealReg(rdx, regalloc.RegTypeInt)
|
||||
rbxVReg = regalloc.FromRealReg(rbx, regalloc.RegTypeInt)
|
||||
rspVReg = regalloc.FromRealReg(rsp, regalloc.RegTypeInt)
|
||||
rbpVReg = regalloc.FromRealReg(rbp, regalloc.RegTypeInt)
|
||||
rsiVReg = regalloc.FromRealReg(rsi, regalloc.RegTypeInt)
|
||||
rdiVReg = regalloc.FromRealReg(rdi, regalloc.RegTypeInt)
|
||||
r8VReg = regalloc.FromRealReg(r8, regalloc.RegTypeInt)
|
||||
r9VReg = regalloc.FromRealReg(r9, regalloc.RegTypeInt)
|
||||
r10VReg = regalloc.FromRealReg(r10, regalloc.RegTypeInt)
|
||||
r11VReg = regalloc.FromRealReg(r11, regalloc.RegTypeInt)
|
||||
r12VReg = regalloc.FromRealReg(r12, regalloc.RegTypeInt)
|
||||
r13VReg = regalloc.FromRealReg(r13, regalloc.RegTypeInt)
|
||||
r14VReg = regalloc.FromRealReg(r14, regalloc.RegTypeInt)
|
||||
r15VReg = regalloc.FromRealReg(r15, regalloc.RegTypeInt)
|
||||
|
||||
xmm0VReg = regalloc.FromRealReg(xmm0, regalloc.RegTypeFloat)
|
||||
xmm1VReg = regalloc.FromRealReg(xmm1, regalloc.RegTypeFloat)
|
||||
xmm2VReg = regalloc.FromRealReg(xmm2, regalloc.RegTypeFloat)
|
||||
xmm3VReg = regalloc.FromRealReg(xmm3, regalloc.RegTypeFloat)
|
||||
xmm4VReg = regalloc.FromRealReg(xmm4, regalloc.RegTypeFloat)
|
||||
xmm5VReg = regalloc.FromRealReg(xmm5, regalloc.RegTypeFloat)
|
||||
xmm6VReg = regalloc.FromRealReg(xmm6, regalloc.RegTypeFloat)
|
||||
xmm7VReg = regalloc.FromRealReg(xmm7, regalloc.RegTypeFloat)
|
||||
xmm8VReg = regalloc.FromRealReg(xmm8, regalloc.RegTypeFloat)
|
||||
xmm9VReg = regalloc.FromRealReg(xmm9, regalloc.RegTypeFloat)
|
||||
xmm10VReg = regalloc.FromRealReg(xmm10, regalloc.RegTypeFloat)
|
||||
xmm11VReg = regalloc.FromRealReg(xmm11, regalloc.RegTypeFloat)
|
||||
xmm12VReg = regalloc.FromRealReg(xmm12, regalloc.RegTypeFloat)
|
||||
xmm13VReg = regalloc.FromRealReg(xmm13, regalloc.RegTypeFloat)
|
||||
xmm14VReg = regalloc.FromRealReg(xmm14, regalloc.RegTypeFloat)
|
||||
xmm15VReg = regalloc.FromRealReg(xmm15, regalloc.RegTypeFloat)
|
||||
)
|
||||
|
||||
var regNames = [...]string{
|
||||
rax: "rax",
|
||||
rcx: "rcx",
|
||||
rdx: "rdx",
|
||||
rbx: "rbx",
|
||||
rsp: "rsp",
|
||||
rbp: "rbp",
|
||||
rsi: "rsi",
|
||||
rdi: "rdi",
|
||||
r8: "r8",
|
||||
r9: "r9",
|
||||
r10: "r10",
|
||||
r11: "r11",
|
||||
r12: "r12",
|
||||
r13: "r13",
|
||||
r14: "r14",
|
||||
r15: "r15",
|
||||
xmm0: "xmm0",
|
||||
xmm1: "xmm1",
|
||||
xmm2: "xmm2",
|
||||
xmm3: "xmm3",
|
||||
xmm4: "xmm4",
|
||||
xmm5: "xmm5",
|
||||
xmm6: "xmm6",
|
||||
xmm7: "xmm7",
|
||||
xmm8: "xmm8",
|
||||
xmm9: "xmm9",
|
||||
xmm10: "xmm10",
|
||||
xmm11: "xmm11",
|
||||
xmm12: "xmm12",
|
||||
xmm13: "xmm13",
|
||||
xmm14: "xmm14",
|
||||
xmm15: "xmm15",
|
||||
}
|
||||
|
||||
func formatVRegSized(r regalloc.VReg, _64 bool) string {
|
||||
if r.IsRealReg() {
|
||||
if r.RegType() == regalloc.RegTypeInt {
|
||||
rr := r.RealReg()
|
||||
orig := regNames[rr]
|
||||
if rr <= rdi {
|
||||
if _64 {
|
||||
return "%" + orig
|
||||
} else {
|
||||
return "%e" + orig[1:]
|
||||
}
|
||||
} else {
|
||||
if _64 {
|
||||
return "%" + orig
|
||||
} else {
|
||||
return "%" + orig + "d"
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return "%" + regNames[r.RealReg()]
|
||||
}
|
||||
} else {
|
||||
if r.RegType() == regalloc.RegTypeInt {
|
||||
if _64 {
|
||||
return fmt.Sprintf("%%r%d?", r.ID())
|
||||
} else {
|
||||
return fmt.Sprintf("%%r%dd?", r.ID())
|
||||
}
|
||||
} else {
|
||||
return fmt.Sprintf("%%xmm%d?", r.ID())
|
||||
}
|
||||
}
|
||||
}
|
128
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go
generated
vendored
Normal file
128
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go
generated
vendored
Normal file
@ -0,0 +1,128 @@
|
||||
package amd64
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"reflect"
|
||||
"unsafe"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/wasmdebug"
|
||||
)
|
||||
|
||||
func stackView(rbp, top uintptr) []byte {
|
||||
var stackBuf []byte
|
||||
{
|
||||
// TODO: use unsafe.Slice after floor version is set to Go 1.20.
|
||||
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))
|
||||
hdr.Data = rbp
|
||||
setSliceLimits(hdr, top-rbp)
|
||||
}
|
||||
return stackBuf
|
||||
}
|
||||
|
||||
// UnwindStack implements wazevo.unwindStack.
|
||||
func UnwindStack(_, rbp, top uintptr, returnAddresses []uintptr) []uintptr {
|
||||
stackBuf := stackView(rbp, top)
|
||||
|
||||
for i := uint64(0); i < uint64(len(stackBuf)); {
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <---- Caller_RBP
|
||||
// | ........... |
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ............ |
|
||||
// | spill slot 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <---- RBP
|
||||
// (low address)
|
||||
|
||||
callerRBP := binary.LittleEndian.Uint64(stackBuf[i:])
|
||||
retAddr := binary.LittleEndian.Uint64(stackBuf[i+8:])
|
||||
returnAddresses = append(returnAddresses, uintptr(retAddr))
|
||||
i = callerRBP - uint64(rbp)
|
||||
if len(returnAddresses) == wasmdebug.MaxFrames {
|
||||
break
|
||||
}
|
||||
}
|
||||
return returnAddresses
|
||||
}
|
||||
|
||||
// GoCallStackView implements wazevo.goCallStackView.
|
||||
func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
|
||||
// (high address)
|
||||
// +-----------------+ <----+
|
||||
// | xxxxxxxxxxx | | ;; optional unused space to make it 16-byte aligned.
|
||||
// ^ | arg[N]/ret[M] | |
|
||||
// sliceSize | | ............ | | SizeInBytes/8
|
||||
// | | arg[1]/ret[1] | |
|
||||
// v | arg[0]/ret[0] | <----+
|
||||
// | SizeInBytes |
|
||||
// +-----------------+ <---- stackPointerBeforeGoCall
|
||||
// (low address)
|
||||
data := unsafe.Pointer(uintptr(unsafe.Pointer(stackPointerBeforeGoCall)) + 8)
|
||||
size := *stackPointerBeforeGoCall / 8
|
||||
return unsafe.Slice((*uint64)(data), int(size))
|
||||
}
|
||||
|
||||
func AdjustClonedStack(oldRsp, oldTop, rsp, rbp, top uintptr) {
|
||||
diff := uint64(rsp - oldRsp)
|
||||
|
||||
newBuf := stackView(rbp, top)
|
||||
for i := uint64(0); i < uint64(len(newBuf)); {
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <---- Caller_RBP
|
||||
// | ........... |
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ............ |
|
||||
// | spill slot 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <---- RBP
|
||||
// (low address)
|
||||
|
||||
callerRBP := binary.LittleEndian.Uint64(newBuf[i:])
|
||||
if callerRBP == 0 {
|
||||
// End of stack.
|
||||
break
|
||||
}
|
||||
if i64 := int64(callerRBP); i64 < int64(oldRsp) || i64 >= int64(oldTop) {
|
||||
panic("BUG: callerRBP is out of range")
|
||||
}
|
||||
if int(callerRBP) < 0 {
|
||||
panic("BUG: callerRBP is negative")
|
||||
}
|
||||
adjustedCallerRBP := callerRBP + diff
|
||||
if int(adjustedCallerRBP) < 0 {
|
||||
panic("BUG: adjustedCallerRBP is negative")
|
||||
}
|
||||
binary.LittleEndian.PutUint64(newBuf[i:], adjustedCallerRBP)
|
||||
i = adjustedCallerRBP - uint64(rbp)
|
||||
}
|
||||
}
|
332
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
generated
vendored
Normal file
332
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
generated
vendored
Normal file
@ -0,0 +1,332 @@
|
||||
package arm64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// References:
|
||||
// * https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#arm64-architecture
|
||||
// * https://developer.arm.com/documentation/102374/0101/Procedure-Call-Standard
|
||||
|
||||
var (
|
||||
intParamResultRegs = []regalloc.RealReg{x0, x1, x2, x3, x4, x5, x6, x7}
|
||||
floatParamResultRegs = []regalloc.RealReg{v0, v1, v2, v3, v4, v5, v6, v7}
|
||||
)
|
||||
|
||||
var regInfo = ®alloc.RegisterInfo{
|
||||
AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{
|
||||
// We don't allocate:
|
||||
// - x18: Reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers
|
||||
// - x28: Reserved by Go runtime.
|
||||
// - x27(=tmpReg): because of the reason described on tmpReg.
|
||||
regalloc.RegTypeInt: {
|
||||
x8, x9, x10, x11, x12, x13, x14, x15,
|
||||
x16, x17, x19, x20, x21, x22, x23, x24, x25,
|
||||
x26, x29, x30,
|
||||
// These are the argument/return registers. Less preferred in the allocation.
|
||||
x7, x6, x5, x4, x3, x2, x1, x0,
|
||||
},
|
||||
regalloc.RegTypeFloat: {
|
||||
v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
|
||||
v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30,
|
||||
// These are the argument/return registers. Less preferred in the allocation.
|
||||
v7, v6, v5, v4, v3, v2, v1, v0,
|
||||
},
|
||||
},
|
||||
CalleeSavedRegisters: regalloc.NewRegSet(
|
||||
x19, x20, x21, x22, x23, x24, x25, x26, x28,
|
||||
v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
|
||||
),
|
||||
CallerSavedRegisters: regalloc.NewRegSet(
|
||||
x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x29, x30,
|
||||
v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
|
||||
),
|
||||
RealRegToVReg: []regalloc.VReg{
|
||||
x0: x0VReg, x1: x1VReg, x2: x2VReg, x3: x3VReg, x4: x4VReg, x5: x5VReg, x6: x6VReg, x7: x7VReg, x8: x8VReg, x9: x9VReg, x10: x10VReg, x11: x11VReg, x12: x12VReg, x13: x13VReg, x14: x14VReg, x15: x15VReg, x16: x16VReg, x17: x17VReg, x18: x18VReg, x19: x19VReg, x20: x20VReg, x21: x21VReg, x22: x22VReg, x23: x23VReg, x24: x24VReg, x25: x25VReg, x26: x26VReg, x27: x27VReg, x28: x28VReg, x29: x29VReg, x30: x30VReg,
|
||||
v0: v0VReg, v1: v1VReg, v2: v2VReg, v3: v3VReg, v4: v4VReg, v5: v5VReg, v6: v6VReg, v7: v7VReg, v8: v8VReg, v9: v9VReg, v10: v10VReg, v11: v11VReg, v12: v12VReg, v13: v13VReg, v14: v14VReg, v15: v15VReg, v16: v16VReg, v17: v17VReg, v18: v18VReg, v19: v19VReg, v20: v20VReg, v21: v21VReg, v22: v22VReg, v23: v23VReg, v24: v24VReg, v25: v25VReg, v26: v26VReg, v27: v27VReg, v28: v28VReg, v29: v29VReg, v30: v30VReg, v31: v31VReg,
|
||||
},
|
||||
RealRegName: func(r regalloc.RealReg) string { return regNames[r] },
|
||||
RealRegType: func(r regalloc.RealReg) regalloc.RegType {
|
||||
if r < v0 {
|
||||
return regalloc.RegTypeInt
|
||||
}
|
||||
return regalloc.RegTypeFloat
|
||||
},
|
||||
}
|
||||
|
||||
// ArgsResultsRegs implements backend.Machine.
|
||||
func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) {
|
||||
return intParamResultRegs, floatParamResultRegs
|
||||
}
|
||||
|
||||
// LowerParams implements backend.FunctionABI.
|
||||
func (m *machine) LowerParams(args []ssa.Value) {
|
||||
a := m.currentABI
|
||||
|
||||
for i, ssaArg := range args {
|
||||
if !ssaArg.Valid() {
|
||||
continue
|
||||
}
|
||||
reg := m.compiler.VRegOf(ssaArg)
|
||||
arg := &a.Args[i]
|
||||
if arg.Kind == backend.ABIArgKindReg {
|
||||
m.InsertMove(reg, arg.Reg, arg.Type)
|
||||
} else {
|
||||
// TODO: we could use pair load if there's consecutive loads for the same type.
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 | <-|
|
||||
// | ReturnAddress | |
|
||||
// +-----------------+ |
|
||||
// | ........... | |
|
||||
// | clobbered M | | argStackOffset: is unknown at this point of compilation.
|
||||
// | ............ | |
|
||||
// | clobbered 0 | |
|
||||
// | spill slot N | |
|
||||
// | ........... | |
|
||||
// | spill slot 0 | |
|
||||
// SP---> +-----------------+ <-+
|
||||
// (low address)
|
||||
|
||||
bits := arg.Type.Bits()
|
||||
// At this point of compilation, we don't yet know how much space exist below the return address.
|
||||
// So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation.
|
||||
amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}
|
||||
load := m.allocateInstr()
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
load.asULoad(operandNR(reg), amode, bits)
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
load.asFpuLoad(operandNR(reg), amode, bits)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
m.insert(load)
|
||||
m.unresolvedAddressModes = append(m.unresolvedAddressModes, load)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LowerReturns lowers the given returns.
|
||||
func (m *machine) LowerReturns(rets []ssa.Value) {
|
||||
a := m.currentABI
|
||||
|
||||
l := len(rets) - 1
|
||||
for i := range rets {
|
||||
// Reverse order in order to avoid overwriting the stack returns existing in the return registers.
|
||||
ret := rets[l-i]
|
||||
r := &a.Rets[l-i]
|
||||
reg := m.compiler.VRegOf(ret)
|
||||
if def := m.compiler.ValueDefinition(ret); def.IsFromInstr() {
|
||||
// Constant instructions are inlined.
|
||||
if inst := def.Instr; inst.Constant() {
|
||||
val := inst.Return()
|
||||
valType := val.Type()
|
||||
v := inst.ConstantVal()
|
||||
m.insertLoadConstant(v, valType, reg)
|
||||
}
|
||||
}
|
||||
if r.Kind == backend.ABIArgKindReg {
|
||||
m.InsertMove(r.Reg, reg, ret.Type())
|
||||
} else {
|
||||
// TODO: we could use pair store if there's consecutive stores for the same type.
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 | <-+
|
||||
// | arg X | |
|
||||
// | ....... | |
|
||||
// | arg 1 | |
|
||||
// | arg 0 | |
|
||||
// | ReturnAddress | |
|
||||
// +-----------------+ |
|
||||
// | ........... | |
|
||||
// | spill slot M | | retStackOffset: is unknown at this point of compilation.
|
||||
// | ............ | |
|
||||
// | spill slot 2 | |
|
||||
// | spill slot 1 | |
|
||||
// | clobbered 0 | |
|
||||
// | clobbered 1 | |
|
||||
// | ........... | |
|
||||
// | clobbered N | |
|
||||
// SP---> +-----------------+ <-+
|
||||
// (low address)
|
||||
|
||||
bits := r.Type.Bits()
|
||||
|
||||
// At this point of compilation, we don't yet know how much space exist below the return address.
|
||||
// So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation.
|
||||
amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}
|
||||
store := m.allocateInstr()
|
||||
store.asStore(operandNR(reg), amode, bits)
|
||||
m.insert(store)
|
||||
m.unresolvedAddressModes = append(m.unresolvedAddressModes, store)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the
|
||||
// caller side of the function call.
|
||||
func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) {
|
||||
arg := &a.Args[argIndex]
|
||||
if def != nil && def.IsFromInstr() {
|
||||
// Constant instructions are inlined.
|
||||
if inst := def.Instr; inst.Constant() {
|
||||
val := inst.Return()
|
||||
valType := val.Type()
|
||||
v := inst.ConstantVal()
|
||||
m.insertLoadConstant(v, valType, reg)
|
||||
}
|
||||
}
|
||||
if arg.Kind == backend.ABIArgKindReg {
|
||||
m.InsertMove(arg.Reg, reg, arg.Type)
|
||||
} else {
|
||||
// TODO: we could use pair store if there's consecutive stores for the same type.
|
||||
//
|
||||
// Note that at this point, stack pointer is already adjusted.
|
||||
bits := arg.Type.Bits()
|
||||
amode := m.resolveAddressModeForOffset(arg.Offset-slotBegin, bits, spVReg, false)
|
||||
store := m.allocateInstr()
|
||||
store.asStore(operandNR(reg), amode, bits)
|
||||
m.insert(store)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex int, reg regalloc.VReg, slotBegin int64) {
|
||||
r := &a.Rets[retIndex]
|
||||
if r.Kind == backend.ABIArgKindReg {
|
||||
m.InsertMove(reg, r.Reg, r.Type)
|
||||
} else {
|
||||
// TODO: we could use pair load if there's consecutive loads for the same type.
|
||||
amode := m.resolveAddressModeForOffset(a.ArgStackSize+r.Offset-slotBegin, r.Type.Bits(), spVReg, false)
|
||||
ldr := m.allocateInstr()
|
||||
switch r.Type {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
ldr.asULoad(operandNR(reg), amode, r.Type.Bits())
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits())
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
m.insert(ldr)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) {
|
||||
exct := m.executableContext
|
||||
exct.PendingInstructions = exct.PendingInstructions[:0]
|
||||
mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse)
|
||||
for _, instr := range exct.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
return cur, mode
|
||||
}
|
||||
|
||||
func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode {
|
||||
if rn.RegType() != regalloc.RegTypeInt {
|
||||
panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64))
|
||||
}
|
||||
var amode addressMode
|
||||
if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) {
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}
|
||||
} else if offsetFitsInAddressModeKindRegSignedImm9(offset) {
|
||||
amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}
|
||||
} else {
|
||||
var indexReg regalloc.VReg
|
||||
if allowTmpRegUse {
|
||||
m.lowerConstantI64(tmpRegVReg, offset)
|
||||
indexReg = tmpRegVReg
|
||||
} else {
|
||||
indexReg = m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerConstantI64(indexReg, offset)
|
||||
}
|
||||
amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}
|
||||
}
|
||||
return amode
|
||||
}
|
||||
|
||||
func (m *machine) lowerCall(si *ssa.Instruction) {
|
||||
isDirectCall := si.Opcode() == ssa.OpcodeCall
|
||||
var indirectCalleePtr ssa.Value
|
||||
var directCallee ssa.FuncRef
|
||||
var sigID ssa.SignatureID
|
||||
var args []ssa.Value
|
||||
if isDirectCall {
|
||||
directCallee, sigID, args = si.CallData()
|
||||
} else {
|
||||
indirectCalleePtr, sigID, args, _ /* on arm64, the calling convention is compatible with the Go runtime */ = si.CallIndirectData()
|
||||
}
|
||||
calleeABI := m.compiler.GetFunctionABI(m.compiler.SSABuilder().ResolveSignature(sigID))
|
||||
|
||||
stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize())
|
||||
if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
|
||||
m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // return address frame.
|
||||
}
|
||||
|
||||
for i, arg := range args {
|
||||
reg := m.compiler.VRegOf(arg)
|
||||
def := m.compiler.ValueDefinition(arg)
|
||||
m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
|
||||
}
|
||||
|
||||
if isDirectCall {
|
||||
call := m.allocateInstr()
|
||||
call.asCall(directCallee, calleeABI)
|
||||
m.insert(call)
|
||||
} else {
|
||||
ptr := m.compiler.VRegOf(indirectCalleePtr)
|
||||
callInd := m.allocateInstr()
|
||||
callInd.asCallIndirect(ptr, calleeABI)
|
||||
m.insert(callInd)
|
||||
}
|
||||
|
||||
var index int
|
||||
r1, rs := si.Returns()
|
||||
if r1.Valid() {
|
||||
m.callerGenFunctionReturnVReg(calleeABI, 0, m.compiler.VRegOf(r1), stackSlotSize)
|
||||
index++
|
||||
}
|
||||
|
||||
for _, r := range rs {
|
||||
m.callerGenFunctionReturnVReg(calleeABI, index, m.compiler.VRegOf(r), stackSlotSize)
|
||||
index++
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) {
|
||||
if imm12Operand, ok := asImm12Operand(uint64(diff)); ok {
|
||||
alu := m.allocateInstr()
|
||||
var ao aluOp
|
||||
if add {
|
||||
ao = aluOpAdd
|
||||
} else {
|
||||
ao = aluOpSub
|
||||
}
|
||||
alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true)
|
||||
m.insert(alu)
|
||||
} else {
|
||||
m.lowerConstantI64(tmpRegVReg, diff)
|
||||
alu := m.allocateInstr()
|
||||
var ao aluOp
|
||||
if add {
|
||||
ao = aluOpAdd
|
||||
} else {
|
||||
ao = aluOpSub
|
||||
}
|
||||
alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true)
|
||||
m.insert(alu)
|
||||
}
|
||||
}
|
9
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.go
generated
vendored
Normal file
9
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.go
generated
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
package arm64
|
||||
|
||||
// entrypoint enters the machine code generated by this backend which begins with the preamble generated by functionABI.EmitGoEntryPreamble below.
|
||||
// This implements wazevo.entrypoint, and see the comments there for detail.
|
||||
func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr)
|
||||
|
||||
// afterGoFunctionCallEntrypoint enters the machine code after growing the stack.
|
||||
// This implements wazevo.afterGoFunctionCallEntrypoint, and see the comments there for detail.
|
||||
func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
|
29
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.s
generated
vendored
Normal file
29
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.s
generated
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
//go:build arm64
|
||||
|
||||
#include "funcdata.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// See the comments on EmitGoEntryPreamble for what this function is supposed to do.
|
||||
TEXT ·entrypoint(SB), NOSPLIT|NOFRAME, $0-48
|
||||
MOVD preambleExecutable+0(FP), R27
|
||||
MOVD functionExectuable+8(FP), R24
|
||||
MOVD executionContextPtr+16(FP), R0
|
||||
MOVD moduleContextPtr+24(FP), R1
|
||||
MOVD paramResultSlicePtr+32(FP), R19
|
||||
MOVD goAllocatedStackSlicePtr+40(FP), R26
|
||||
JMP (R27)
|
||||
|
||||
TEXT ·afterGoFunctionCallEntrypoint(SB), NOSPLIT|NOFRAME, $0-32
|
||||
MOVD goCallReturnAddress+0(FP), R20
|
||||
MOVD executionContextPtr+8(FP), R0
|
||||
MOVD stackPointer+16(FP), R19
|
||||
|
||||
// Save the current FP(R29), SP and LR(R30) into the wazevo.executionContext (stored in R0).
|
||||
MOVD R29, 16(R0) // Store FP(R29) into [RO, #ExecutionContextOffsets.OriginalFramePointer]
|
||||
MOVD RSP, R27 // Move SP to R27 (temporary register) since SP cannot be stored directly in str instructions.
|
||||
MOVD R27, 24(R0) // Store R27 into [RO, #ExecutionContextOffsets.OriginalFramePointer]
|
||||
MOVD R30, 32(R0) // Store R30 into [R0, #ExecutionContextOffsets.GoReturnAddress]
|
||||
|
||||
// Load the new stack pointer (which sits somewhere in Go-allocated stack) into SP.
|
||||
MOVD R19, RSP
|
||||
JMP (R20)
|
230
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
generated
vendored
Normal file
230
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
generated
vendored
Normal file
@ -0,0 +1,230 @@
|
||||
package arm64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
// CompileEntryPreamble implements backend.Machine. This assumes `entrypoint` function (in abi_go_entry_arm64.s) passes:
|
||||
//
|
||||
// 1. First (execution context ptr) and Second arguments are already passed in x0, and x1.
|
||||
// 2. param/result slice ptr in x19; the pointer to []uint64{} which is used to pass arguments and accept return values.
|
||||
// 3. Go-allocated stack slice ptr in x26.
|
||||
// 4. Function executable in x24.
|
||||
//
|
||||
// also SP and FP are correct Go-runtime-based values, and LR is the return address to the Go-side caller.
|
||||
func (m *machine) CompileEntryPreamble(signature *ssa.Signature) []byte {
|
||||
root := m.constructEntryPreamble(signature)
|
||||
m.encode(root)
|
||||
return m.compiler.Buf()
|
||||
}
|
||||
|
||||
var (
|
||||
executionContextPtrReg = x0VReg
|
||||
// callee-saved regs so that they can be used in the prologue and epilogue.
|
||||
paramResultSlicePtr = x19VReg
|
||||
savedExecutionContextPtr = x20VReg
|
||||
// goAllocatedStackPtr is not used in the epilogue.
|
||||
goAllocatedStackPtr = x26VReg
|
||||
// paramResultSliceCopied is not used in the epilogue.
|
||||
paramResultSliceCopied = x25VReg
|
||||
// tmpRegVReg is not used in the epilogue.
|
||||
functionExecutable = x24VReg
|
||||
)
|
||||
|
||||
func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regalloc.VReg, arg *backend.ABIArg, argStartOffsetFromSP int64) *instruction {
|
||||
typ := arg.Type
|
||||
bits := typ.Bits()
|
||||
isStackArg := arg.Kind == backend.ABIArgKindStack
|
||||
|
||||
var loadTargetReg operand
|
||||
if !isStackArg {
|
||||
loadTargetReg = operandNR(arg.Reg)
|
||||
} else {
|
||||
switch typ {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
loadTargetReg = operandNR(x15VReg)
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
loadTargetReg = operandNR(v15VReg)
|
||||
default:
|
||||
panic("TODO?")
|
||||
}
|
||||
}
|
||||
|
||||
var postIndexImm int64
|
||||
if typ == ssa.TypeV128 {
|
||||
postIndexImm = 16 // v128 is represented as 2x64-bit in Go slice.
|
||||
} else {
|
||||
postIndexImm = 8
|
||||
}
|
||||
loadMode := addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm}
|
||||
|
||||
instr := m.allocateInstr()
|
||||
switch typ {
|
||||
case ssa.TypeI32:
|
||||
instr.asULoad(loadTargetReg, loadMode, 32)
|
||||
case ssa.TypeI64:
|
||||
instr.asULoad(loadTargetReg, loadMode, 64)
|
||||
case ssa.TypeF32:
|
||||
instr.asFpuLoad(loadTargetReg, loadMode, 32)
|
||||
case ssa.TypeF64:
|
||||
instr.asFpuLoad(loadTargetReg, loadMode, 64)
|
||||
case ssa.TypeV128:
|
||||
instr.asFpuLoad(loadTargetReg, loadMode, 128)
|
||||
}
|
||||
cur = linkInstr(cur, instr)
|
||||
|
||||
if isStackArg {
|
||||
var storeMode addressMode
|
||||
cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true)
|
||||
toStack := m.allocateInstr()
|
||||
toStack.asStore(loadTargetReg, storeMode, bits)
|
||||
cur = linkInstr(cur, toStack)
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr regalloc.VReg, result *backend.ABIArg, resultStartOffsetFromSP int64) *instruction {
|
||||
isStackArg := result.Kind == backend.ABIArgKindStack
|
||||
typ := result.Type
|
||||
bits := typ.Bits()
|
||||
|
||||
var storeTargetReg operand
|
||||
if !isStackArg {
|
||||
storeTargetReg = operandNR(result.Reg)
|
||||
} else {
|
||||
switch typ {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
storeTargetReg = operandNR(x15VReg)
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
storeTargetReg = operandNR(v15VReg)
|
||||
default:
|
||||
panic("TODO?")
|
||||
}
|
||||
}
|
||||
|
||||
var postIndexImm int64
|
||||
if typ == ssa.TypeV128 {
|
||||
postIndexImm = 16 // v128 is represented as 2x64-bit in Go slice.
|
||||
} else {
|
||||
postIndexImm = 8
|
||||
}
|
||||
|
||||
if isStackArg {
|
||||
var loadMode addressMode
|
||||
cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true)
|
||||
toReg := m.allocateInstr()
|
||||
switch typ {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
toReg.asULoad(storeTargetReg, loadMode, bits)
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
toReg.asFpuLoad(storeTargetReg, loadMode, bits)
|
||||
default:
|
||||
panic("TODO?")
|
||||
}
|
||||
cur = linkInstr(cur, toReg)
|
||||
}
|
||||
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm}
|
||||
instr := m.allocateInstr()
|
||||
instr.asStore(storeTargetReg, mode, bits)
|
||||
cur = linkInstr(cur, instr)
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) constructEntryPreamble(sig *ssa.Signature) (root *instruction) {
|
||||
abi := backend.FunctionABI{}
|
||||
abi.Init(sig, intParamResultRegs, floatParamResultRegs)
|
||||
|
||||
root = m.allocateNop()
|
||||
|
||||
//// ----------------------------------- prologue ----------------------------------- ////
|
||||
|
||||
// First, we save executionContextPtrReg into a callee-saved register so that it can be used in epilogue as well.
|
||||
// mov savedExecutionContextPtr, x0
|
||||
cur := m.move64(savedExecutionContextPtr, executionContextPtrReg, root)
|
||||
|
||||
// Next, save the current FP, SP and LR into the wazevo.executionContext:
|
||||
// str fp, [savedExecutionContextPtr, #OriginalFramePointer]
|
||||
// mov tmp, sp ;; sp cannot be str'ed directly.
|
||||
// str sp, [savedExecutionContextPtr, #OriginalStackPointer]
|
||||
// str lr, [savedExecutionContextPtr, #GoReturnAddress]
|
||||
cur = m.loadOrStoreAtExecutionContext(fpVReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, true, cur)
|
||||
cur = m.move64(tmpRegVReg, spVReg, cur)
|
||||
cur = m.loadOrStoreAtExecutionContext(tmpRegVReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, true, cur)
|
||||
cur = m.loadOrStoreAtExecutionContext(lrVReg, wazevoapi.ExecutionContextOffsetGoReturnAddress, true, cur)
|
||||
|
||||
// Then, move the Go-allocated stack pointer to SP:
|
||||
// mov sp, goAllocatedStackPtr
|
||||
cur = m.move64(spVReg, goAllocatedStackPtr, cur)
|
||||
|
||||
prReg := paramResultSlicePtr
|
||||
if len(abi.Args) > 2 && len(abi.Rets) > 0 {
|
||||
// paramResultSlicePtr is modified during the execution of goEntryPreamblePassArg,
|
||||
// so copy it to another reg.
|
||||
cur = m.move64(paramResultSliceCopied, paramResultSlicePtr, cur)
|
||||
prReg = paramResultSliceCopied
|
||||
}
|
||||
|
||||
stackSlotSize := int64(abi.AlignedArgResultStackSlotSize())
|
||||
for i := range abi.Args {
|
||||
if i < 2 {
|
||||
// module context ptr and execution context ptr are passed in x0 and x1 by the Go assembly function.
|
||||
continue
|
||||
}
|
||||
arg := &abi.Args[i]
|
||||
cur = m.goEntryPreamblePassArg(cur, prReg, arg, -stackSlotSize)
|
||||
}
|
||||
|
||||
// Call the real function.
|
||||
bl := m.allocateInstr()
|
||||
bl.asCallIndirect(functionExecutable, &abi)
|
||||
cur = linkInstr(cur, bl)
|
||||
|
||||
///// ----------------------------------- epilogue ----------------------------------- /////
|
||||
|
||||
// Store the register results into paramResultSlicePtr.
|
||||
for i := range abi.Rets {
|
||||
cur = m.goEntryPreamblePassResult(cur, paramResultSlicePtr, &abi.Rets[i], abi.ArgStackSize-stackSlotSize)
|
||||
}
|
||||
|
||||
// Finally, restore the FP, SP and LR, and return to the Go code.
|
||||
// ldr fp, [savedExecutionContextPtr, #OriginalFramePointer]
|
||||
// ldr tmp, [savedExecutionContextPtr, #OriginalStackPointer]
|
||||
// mov sp, tmp ;; sp cannot be str'ed directly.
|
||||
// ldr lr, [savedExecutionContextPtr, #GoReturnAddress]
|
||||
// ret ;; --> return to the Go code
|
||||
cur = m.loadOrStoreAtExecutionContext(fpVReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, false, cur)
|
||||
cur = m.loadOrStoreAtExecutionContext(tmpRegVReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, false, cur)
|
||||
cur = m.move64(spVReg, tmpRegVReg, cur)
|
||||
cur = m.loadOrStoreAtExecutionContext(lrVReg, wazevoapi.ExecutionContextOffsetGoReturnAddress, false, cur)
|
||||
retInst := m.allocateInstr()
|
||||
retInst.asRet()
|
||||
linkInstr(cur, retInst)
|
||||
return
|
||||
}
|
||||
|
||||
func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction {
|
||||
instr := m.allocateInstr()
|
||||
instr.asMove64(dst, src)
|
||||
return linkInstr(prev, instr)
|
||||
}
|
||||
|
||||
func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction {
|
||||
instr := m.allocateInstr()
|
||||
mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()}
|
||||
if store {
|
||||
instr.asStore(operandNR(d), mode, 64)
|
||||
} else {
|
||||
instr.asULoad(operandNR(d), mode, 64)
|
||||
}
|
||||
return linkInstr(prev, instr)
|
||||
}
|
||||
|
||||
func linkInstr(prev, next *instruction) *instruction {
|
||||
prev.next = next
|
||||
next.prev = prev
|
||||
return next
|
||||
}
|
428
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
generated
vendored
Normal file
428
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
generated
vendored
Normal file
@ -0,0 +1,428 @@
|
||||
package arm64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
var calleeSavedRegistersSorted = []regalloc.VReg{
|
||||
x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg,
|
||||
v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg,
|
||||
}
|
||||
|
||||
// CompileGoFunctionTrampoline implements backend.Machine.
|
||||
func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte {
|
||||
exct := m.executableContext
|
||||
argBegin := 1 // Skips exec context by default.
|
||||
if needModuleContextPtr {
|
||||
argBegin++
|
||||
}
|
||||
|
||||
abi := &backend.FunctionABI{}
|
||||
abi.Init(sig, intParamResultRegs, floatParamResultRegs)
|
||||
m.currentABI = abi
|
||||
|
||||
cur := m.allocateInstr()
|
||||
cur.asNop0()
|
||||
exct.RootInstr = cur
|
||||
|
||||
// Execution context is always the first argument.
|
||||
execCtrPtr := x0VReg
|
||||
|
||||
// In the following, we create the following stack layout:
|
||||
//
|
||||
// (high address)
|
||||
// SP ------> +-----------------+ <----+
|
||||
// | ....... | |
|
||||
// | ret Y | |
|
||||
// | ....... | |
|
||||
// | ret 0 | |
|
||||
// | arg X | | size_of_arg_ret
|
||||
// | ....... | |
|
||||
// | arg 1 | |
|
||||
// | arg 0 | <----+ <-------- originalArg0Reg
|
||||
// | size_of_arg_ret |
|
||||
// | ReturnAddress |
|
||||
// +-----------------+ <----+
|
||||
// | xxxx | | ;; might be padded to make it 16-byte aligned.
|
||||
// +--->| arg[N]/ret[M] | |
|
||||
// sliceSize| | ............ | | goCallStackSize
|
||||
// | | arg[1]/ret[1] | |
|
||||
// +--->| arg[0]/ret[0] | <----+ <-------- arg0ret0AddrReg
|
||||
// | sliceSize |
|
||||
// | frame_size |
|
||||
// +-----------------+
|
||||
// (low address)
|
||||
//
|
||||
// where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions,
|
||||
// therefore will be accessed as the usual []uint64. So that's where we need to pass/receive
|
||||
// the arguments/return values.
|
||||
|
||||
// First of all, to update the SP, and create "ReturnAddress + size_of_arg_ret".
|
||||
cur = m.createReturnAddrAndSizeOfArgRetSlot(cur)
|
||||
|
||||
const frameInfoSize = 16 // == frame_size + sliceSize.
|
||||
|
||||
// Next, we should allocate the stack for the Go function call if necessary.
|
||||
goCallStackSize, sliceSizeInBytes := backend.GoFunctionCallRequiredStackSize(sig, argBegin)
|
||||
cur = m.insertStackBoundsCheck(goCallStackSize+frameInfoSize, cur)
|
||||
|
||||
originalArg0Reg := x17VReg // Caller save, so we can use it for whatever we want.
|
||||
if m.currentABI.AlignedArgResultStackSlotSize() > 0 {
|
||||
// At this point, SP points to `ReturnAddress`, so add 16 to get the original arg 0 slot.
|
||||
cur = m.addsAddOrSubStackPointer(cur, originalArg0Reg, frameInfoSize, true)
|
||||
}
|
||||
|
||||
// Save the callee saved registers.
|
||||
cur = m.saveRegistersInExecutionContext(cur, calleeSavedRegistersSorted)
|
||||
|
||||
if needModuleContextPtr {
|
||||
offset := wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.I64()
|
||||
if !offsetFitsInAddressModeKindRegUnsignedImm12(64, offset) {
|
||||
panic("BUG: too large or un-aligned offset for goFunctionCallCalleeModuleContextOpaque in execution context")
|
||||
}
|
||||
|
||||
// Module context is always the second argument.
|
||||
moduleCtrPtr := x1VReg
|
||||
store := m.allocateInstr()
|
||||
amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset}
|
||||
store.asStore(operandNR(moduleCtrPtr), amode, 64)
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
|
||||
// Advances the stack pointer.
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, goCallStackSize, false)
|
||||
|
||||
// Copy the pointer to x15VReg.
|
||||
arg0ret0AddrReg := x15VReg // Caller save, so we can use it for whatever we want.
|
||||
copySp := m.allocateInstr()
|
||||
copySp.asMove64(arg0ret0AddrReg, spVReg)
|
||||
cur = linkInstr(cur, copySp)
|
||||
|
||||
// Next, we need to store all the arguments to the stack in the typical Wasm stack style.
|
||||
for i := range abi.Args[argBegin:] {
|
||||
arg := &abi.Args[argBegin+i]
|
||||
store := m.allocateInstr()
|
||||
var v regalloc.VReg
|
||||
if arg.Kind == backend.ABIArgKindReg {
|
||||
v = arg.Reg
|
||||
} else {
|
||||
cur, v = m.goFunctionCallLoadStackArg(cur, originalArg0Reg, arg,
|
||||
// Caller save, so we can use it for whatever we want.
|
||||
x11VReg, v11VReg)
|
||||
}
|
||||
|
||||
var sizeInBits byte
|
||||
if arg.Type == ssa.TypeV128 {
|
||||
sizeInBits = 128
|
||||
} else {
|
||||
sizeInBits = 64
|
||||
}
|
||||
store.asStore(operandNR(v),
|
||||
addressMode{
|
||||
kind: addressModeKindPostIndex,
|
||||
rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8),
|
||||
}, sizeInBits)
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
|
||||
// Finally, now that we've advanced SP to arg[0]/ret[0], we allocate `frame_size + sliceSize`.
|
||||
var frameSizeReg, sliceSizeReg regalloc.VReg
|
||||
if goCallStackSize > 0 {
|
||||
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, goCallStackSize)
|
||||
frameSizeReg = tmpRegVReg
|
||||
cur = m.lowerConstantI64AndInsert(cur, x16VReg, sliceSizeInBytes/8)
|
||||
sliceSizeReg = x16VReg
|
||||
} else {
|
||||
frameSizeReg = xzrVReg
|
||||
sliceSizeReg = xzrVReg
|
||||
}
|
||||
_amode := addressModePreOrPostIndex(spVReg, -16, true)
|
||||
storeP := m.allocateInstr()
|
||||
storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode)
|
||||
cur = linkInstr(cur, storeP)
|
||||
|
||||
// Set the exit status on the execution context.
|
||||
cur = m.setExitCode(cur, x0VReg, exitCode)
|
||||
|
||||
// Save the current stack pointer.
|
||||
cur = m.saveCurrentStackPointer(cur, x0VReg)
|
||||
|
||||
// Exit the execution.
|
||||
cur = m.storeReturnAddressAndExit(cur)
|
||||
|
||||
// After the call, we need to restore the callee saved registers.
|
||||
cur = m.restoreRegistersInExecutionContext(cur, calleeSavedRegistersSorted)
|
||||
|
||||
// Get the pointer to the arg[0]/ret[0]: We need to skip `frame_size + sliceSize`.
|
||||
if len(abi.Rets) > 0 {
|
||||
cur = m.addsAddOrSubStackPointer(cur, arg0ret0AddrReg, frameInfoSize, true)
|
||||
}
|
||||
|
||||
// Advances the SP so that it points to `ReturnAddress`.
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true)
|
||||
ldr := m.allocateInstr()
|
||||
// And load the return address.
|
||||
ldr.asULoad(operandNR(lrVReg),
|
||||
addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
|
||||
cur = linkInstr(cur, ldr)
|
||||
|
||||
originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want.
|
||||
if m.currentABI.RetStackSize > 0 {
|
||||
cur = m.addsAddOrSubStackPointer(cur, originalRet0Reg, m.currentABI.ArgStackSize, true)
|
||||
}
|
||||
|
||||
// Make the SP point to the original address (above the result slot).
|
||||
if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 {
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
|
||||
}
|
||||
|
||||
for i := range abi.Rets {
|
||||
r := &abi.Rets[i]
|
||||
if r.Kind == backend.ABIArgKindReg {
|
||||
loadIntoReg := m.allocateInstr()
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
|
||||
switch r.Type {
|
||||
case ssa.TypeI32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoReg.asULoad(operandNR(r.Reg), mode, 32)
|
||||
case ssa.TypeI64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoReg.asULoad(operandNR(r.Reg), mode, 64)
|
||||
case ssa.TypeF32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32)
|
||||
case ssa.TypeF64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64)
|
||||
case ssa.TypeV128:
|
||||
mode.imm = 16
|
||||
loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128)
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
cur = linkInstr(cur, loadIntoReg)
|
||||
} else {
|
||||
// First we need to load the value to a temporary just like ^^.
|
||||
intTmp, floatTmp := x11VReg, v11VReg
|
||||
loadIntoTmpReg := m.allocateInstr()
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
|
||||
var resultReg regalloc.VReg
|
||||
switch r.Type {
|
||||
case ssa.TypeI32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32)
|
||||
resultReg = intTmp
|
||||
case ssa.TypeI64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64)
|
||||
resultReg = intTmp
|
||||
case ssa.TypeF32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32)
|
||||
resultReg = floatTmp
|
||||
case ssa.TypeF64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64)
|
||||
resultReg = floatTmp
|
||||
case ssa.TypeV128:
|
||||
mode.imm = 16
|
||||
loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128)
|
||||
resultReg = floatTmp
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
cur = linkInstr(cur, loadIntoTmpReg)
|
||||
cur = m.goFunctionCallStoreStackResult(cur, originalRet0Reg, r, resultReg)
|
||||
}
|
||||
}
|
||||
|
||||
ret := m.allocateInstr()
|
||||
ret.asRet()
|
||||
linkInstr(cur, ret)
|
||||
|
||||
m.encode(m.executableContext.RootInstr)
|
||||
return m.compiler.Buf()
|
||||
}
|
||||
|
||||
func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction {
|
||||
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
|
||||
for _, v := range regs {
|
||||
store := m.allocateInstr()
|
||||
var sizeInBits byte
|
||||
switch v.RegType() {
|
||||
case regalloc.RegTypeInt:
|
||||
sizeInBits = 64
|
||||
case regalloc.RegTypeFloat:
|
||||
sizeInBits = 128
|
||||
}
|
||||
store.asStore(operandNR(v),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: offset,
|
||||
}, sizeInBits)
|
||||
store.prev = cur
|
||||
cur.next = store
|
||||
cur = store
|
||||
offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally store regs at the offset of multiple of 16.
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction {
|
||||
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
|
||||
for _, v := range regs {
|
||||
load := m.allocateInstr()
|
||||
var as func(dst operand, amode addressMode, sizeInBits byte)
|
||||
var sizeInBits byte
|
||||
switch v.RegType() {
|
||||
case regalloc.RegTypeInt:
|
||||
as = load.asULoad
|
||||
sizeInBits = 64
|
||||
case regalloc.RegTypeFloat:
|
||||
as = load.asFpuLoad
|
||||
sizeInBits = 128
|
||||
}
|
||||
as(operandNR(v),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: offset,
|
||||
}, sizeInBits)
|
||||
cur = linkInstr(cur, load)
|
||||
offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16.
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) lowerConstantI64AndInsert(cur *instruction, dst regalloc.VReg, v int64) *instruction {
|
||||
exct := m.executableContext
|
||||
exct.PendingInstructions = exct.PendingInstructions[:0]
|
||||
m.lowerConstantI64(dst, v)
|
||||
for _, instr := range exct.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) lowerConstantI32AndInsert(cur *instruction, dst regalloc.VReg, v int32) *instruction {
|
||||
exct := m.executableContext
|
||||
exct.PendingInstructions = exct.PendingInstructions[:0]
|
||||
m.lowerConstantI32(dst, v)
|
||||
for _, instr := range exct.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode wazevoapi.ExitCode) *instruction {
|
||||
constReg := x17VReg // caller-saved, so we can use it.
|
||||
cur = m.lowerConstantI32AndInsert(cur, constReg, int32(exitCode))
|
||||
|
||||
// Set the exit status on the execution context.
|
||||
setExistStatus := m.allocateInstr()
|
||||
setExistStatus.asStore(operandNR(constReg),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
|
||||
}, 32)
|
||||
cur = linkInstr(cur, setExistStatus)
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction {
|
||||
// Read the return address into tmp, and store it in the execution context.
|
||||
adr := m.allocateInstr()
|
||||
adr.asAdr(tmpRegVReg, exitSequenceSize+8)
|
||||
cur = linkInstr(cur, adr)
|
||||
|
||||
storeReturnAddr := m.allocateInstr()
|
||||
storeReturnAddr.asStore(operandNR(tmpRegVReg),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
|
||||
}, 64)
|
||||
cur = linkInstr(cur, storeReturnAddr)
|
||||
|
||||
// Exit the execution.
|
||||
trapSeq := m.allocateInstr()
|
||||
trapSeq.asExitSequence(x0VReg)
|
||||
cur = linkInstr(cur, trapSeq)
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VReg) *instruction {
|
||||
// Save the current stack pointer:
|
||||
// mov tmp, sp,
|
||||
// str tmp, [exec_ctx, #stackPointerBeforeGoCall]
|
||||
movSp := m.allocateInstr()
|
||||
movSp.asMove64(tmpRegVReg, spVReg)
|
||||
cur = linkInstr(cur, movSp)
|
||||
|
||||
strSp := m.allocateInstr()
|
||||
strSp.asStore(operandNR(tmpRegVReg),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
|
||||
}, 64)
|
||||
cur = linkInstr(cur, strSp)
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) {
|
||||
load := m.allocateInstr()
|
||||
var result regalloc.VReg
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg}
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
load.asULoad(operandNR(intVReg), mode, 32)
|
||||
result = intVReg
|
||||
case ssa.TypeI64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
load.asULoad(operandNR(intVReg), mode, 64)
|
||||
result = intVReg
|
||||
case ssa.TypeF32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
load.asFpuLoad(operandNR(floatVReg), mode, 32)
|
||||
result = floatVReg
|
||||
case ssa.TypeF64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
load.asFpuLoad(operandNR(floatVReg), mode, 64)
|
||||
result = floatVReg
|
||||
case ssa.TypeV128:
|
||||
mode.imm = 16
|
||||
load.asFpuLoad(operandNR(floatVReg), mode, 128)
|
||||
result = floatVReg
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
cur = linkInstr(cur, load)
|
||||
return cur, result
|
||||
}
|
||||
|
||||
func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction {
|
||||
store := m.allocateInstr()
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg}
|
||||
var sizeInBits byte
|
||||
switch result.Type {
|
||||
case ssa.TypeI32, ssa.TypeF32:
|
||||
mode.imm = 8
|
||||
sizeInBits = 32
|
||||
case ssa.TypeI64, ssa.TypeF64:
|
||||
mode.imm = 8
|
||||
sizeInBits = 64
|
||||
case ssa.TypeV128:
|
||||
mode.imm = 16
|
||||
sizeInBits = 128
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
store.asStore(operandNR(resultVReg), mode, sizeInBits)
|
||||
return linkInstr(cur, store)
|
||||
}
|
215
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/cond.go
generated
vendored
Normal file
215
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/cond.go
generated
vendored
Normal file
@ -0,0 +1,215 @@
|
||||
package arm64
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
type (
|
||||
cond uint64
|
||||
condKind byte
|
||||
)
|
||||
|
||||
const (
|
||||
// condKindRegisterZero represents a condition which checks if the register is zero.
|
||||
// This indicates that the instruction must be encoded as CBZ:
|
||||
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/CBZ--Compare-and-Branch-on-Zero-
|
||||
condKindRegisterZero condKind = iota
|
||||
// condKindRegisterNotZero indicates that the instruction must be encoded as CBNZ:
|
||||
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/CBNZ--Compare-and-Branch-on-Nonzero-
|
||||
condKindRegisterNotZero
|
||||
// condKindCondFlagSet indicates that the instruction must be encoded as B.cond:
|
||||
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally-
|
||||
condKindCondFlagSet
|
||||
)
|
||||
|
||||
// kind returns the kind of condition which is stored in the first two bits.
|
||||
func (c cond) kind() condKind {
|
||||
return condKind(c & 0b11)
|
||||
}
|
||||
|
||||
func (c cond) asUint64() uint64 {
|
||||
return uint64(c)
|
||||
}
|
||||
|
||||
// register returns the register for register conditions.
|
||||
// This panics if the condition is not a register condition (condKindRegisterZero or condKindRegisterNotZero).
|
||||
func (c cond) register() regalloc.VReg {
|
||||
if c.kind() != condKindRegisterZero && c.kind() != condKindRegisterNotZero {
|
||||
panic("condition is not a register")
|
||||
}
|
||||
return regalloc.VReg(c >> 2)
|
||||
}
|
||||
|
||||
func registerAsRegZeroCond(r regalloc.VReg) cond {
|
||||
return cond(r)<<2 | cond(condKindRegisterZero)
|
||||
}
|
||||
|
||||
func registerAsRegNotZeroCond(r regalloc.VReg) cond {
|
||||
return cond(r)<<2 | cond(condKindRegisterNotZero)
|
||||
}
|
||||
|
||||
func (c cond) flag() condFlag {
|
||||
if c.kind() != condKindCondFlagSet {
|
||||
panic("condition is not a flag")
|
||||
}
|
||||
return condFlag(c >> 2)
|
||||
}
|
||||
|
||||
func (c condFlag) asCond() cond {
|
||||
return cond(c)<<2 | cond(condKindCondFlagSet)
|
||||
}
|
||||
|
||||
// condFlag represents a condition flag for conditional branches.
|
||||
// The value matches the encoding of condition flags in the ARM64 instruction set.
|
||||
// https://developer.arm.com/documentation/den0024/a/The-A64-instruction-set/Data-processing-instructions/Conditional-instructions
|
||||
type condFlag uint8
|
||||
|
||||
const (
|
||||
eq condFlag = iota // eq represents "equal"
|
||||
ne // ne represents "not equal"
|
||||
hs // hs represents "higher or same"
|
||||
lo // lo represents "lower"
|
||||
mi // mi represents "minus or negative result"
|
||||
pl // pl represents "plus or positive result"
|
||||
vs // vs represents "overflow set"
|
||||
vc // vc represents "overflow clear"
|
||||
hi // hi represents "higher"
|
||||
ls // ls represents "lower or same"
|
||||
ge // ge represents "greater or equal"
|
||||
lt // lt represents "less than"
|
||||
gt // gt represents "greater than"
|
||||
le // le represents "less than or equal"
|
||||
al // al represents "always"
|
||||
nv // nv represents "never"
|
||||
)
|
||||
|
||||
// invert returns the inverted condition.
|
||||
func (c condFlag) invert() condFlag {
|
||||
switch c {
|
||||
case eq:
|
||||
return ne
|
||||
case ne:
|
||||
return eq
|
||||
case hs:
|
||||
return lo
|
||||
case lo:
|
||||
return hs
|
||||
case mi:
|
||||
return pl
|
||||
case pl:
|
||||
return mi
|
||||
case vs:
|
||||
return vc
|
||||
case vc:
|
||||
return vs
|
||||
case hi:
|
||||
return ls
|
||||
case ls:
|
||||
return hi
|
||||
case ge:
|
||||
return lt
|
||||
case lt:
|
||||
return ge
|
||||
case gt:
|
||||
return le
|
||||
case le:
|
||||
return gt
|
||||
case al:
|
||||
return nv
|
||||
case nv:
|
||||
return al
|
||||
default:
|
||||
panic(c)
|
||||
}
|
||||
}
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (c condFlag) String() string {
|
||||
switch c {
|
||||
case eq:
|
||||
return "eq"
|
||||
case ne:
|
||||
return "ne"
|
||||
case hs:
|
||||
return "hs"
|
||||
case lo:
|
||||
return "lo"
|
||||
case mi:
|
||||
return "mi"
|
||||
case pl:
|
||||
return "pl"
|
||||
case vs:
|
||||
return "vs"
|
||||
case vc:
|
||||
return "vc"
|
||||
case hi:
|
||||
return "hi"
|
||||
case ls:
|
||||
return "ls"
|
||||
case ge:
|
||||
return "ge"
|
||||
case lt:
|
||||
return "lt"
|
||||
case gt:
|
||||
return "gt"
|
||||
case le:
|
||||
return "le"
|
||||
case al:
|
||||
return "al"
|
||||
case nv:
|
||||
return "nv"
|
||||
default:
|
||||
panic(strconv.Itoa(int(c)))
|
||||
}
|
||||
}
|
||||
|
||||
// condFlagFromSSAIntegerCmpCond returns the condition flag for the given ssa.IntegerCmpCond.
|
||||
func condFlagFromSSAIntegerCmpCond(c ssa.IntegerCmpCond) condFlag {
|
||||
switch c {
|
||||
case ssa.IntegerCmpCondEqual:
|
||||
return eq
|
||||
case ssa.IntegerCmpCondNotEqual:
|
||||
return ne
|
||||
case ssa.IntegerCmpCondSignedLessThan:
|
||||
return lt
|
||||
case ssa.IntegerCmpCondSignedGreaterThanOrEqual:
|
||||
return ge
|
||||
case ssa.IntegerCmpCondSignedGreaterThan:
|
||||
return gt
|
||||
case ssa.IntegerCmpCondSignedLessThanOrEqual:
|
||||
return le
|
||||
case ssa.IntegerCmpCondUnsignedLessThan:
|
||||
return lo
|
||||
case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual:
|
||||
return hs
|
||||
case ssa.IntegerCmpCondUnsignedGreaterThan:
|
||||
return hi
|
||||
case ssa.IntegerCmpCondUnsignedLessThanOrEqual:
|
||||
return ls
|
||||
default:
|
||||
panic(c)
|
||||
}
|
||||
}
|
||||
|
||||
// condFlagFromSSAFloatCmpCond returns the condition flag for the given ssa.FloatCmpCond.
|
||||
func condFlagFromSSAFloatCmpCond(c ssa.FloatCmpCond) condFlag {
|
||||
switch c {
|
||||
case ssa.FloatCmpCondEqual:
|
||||
return eq
|
||||
case ssa.FloatCmpCondNotEqual:
|
||||
return ne
|
||||
case ssa.FloatCmpCondLessThan:
|
||||
return mi
|
||||
case ssa.FloatCmpCondLessThanOrEqual:
|
||||
return ls
|
||||
case ssa.FloatCmpCondGreaterThan:
|
||||
return gt
|
||||
case ssa.FloatCmpCondGreaterThanOrEqual:
|
||||
return ge
|
||||
default:
|
||||
panic(c)
|
||||
}
|
||||
}
|
2545
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
generated
vendored
Normal file
2545
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2351
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
generated
vendored
Normal file
2351
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
301
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
generated
vendored
Normal file
301
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
generated
vendored
Normal file
@ -0,0 +1,301 @@
|
||||
package arm64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
|
||||
func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
|
||||
val := instr.Return()
|
||||
valType := val.Type()
|
||||
|
||||
vr = m.compiler.AllocateVReg(valType)
|
||||
v := instr.ConstantVal()
|
||||
m.insertLoadConstant(v, valType, vr)
|
||||
return
|
||||
}
|
||||
|
||||
// InsertLoadConstantBlockArg implements backend.Machine.
|
||||
func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) {
|
||||
val := instr.Return()
|
||||
valType := val.Type()
|
||||
v := instr.ConstantVal()
|
||||
load := m.allocateInstr()
|
||||
load.asLoadConstBlockArg(v, valType, vr)
|
||||
m.insert(load)
|
||||
}
|
||||
|
||||
func (m *machine) lowerLoadConstantBlockArgAfterRegAlloc(i *instruction) {
|
||||
v, typ, dst := i.loadConstBlockArgData()
|
||||
m.insertLoadConstant(v, typ, dst)
|
||||
}
|
||||
|
||||
func (m *machine) insertLoadConstant(v uint64, valType ssa.Type, vr regalloc.VReg) {
|
||||
if valType.Bits() < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
|
||||
v = v & ((1 << valType.Bits()) - 1)
|
||||
}
|
||||
|
||||
switch valType {
|
||||
case ssa.TypeF32:
|
||||
loadF := m.allocateInstr()
|
||||
loadF.asLoadFpuConst32(vr, v)
|
||||
m.insert(loadF)
|
||||
case ssa.TypeF64:
|
||||
loadF := m.allocateInstr()
|
||||
loadF.asLoadFpuConst64(vr, v)
|
||||
m.insert(loadF)
|
||||
case ssa.TypeI32:
|
||||
if v == 0 {
|
||||
m.InsertMove(vr, xzrVReg, ssa.TypeI32)
|
||||
} else {
|
||||
m.lowerConstantI32(vr, int32(v))
|
||||
}
|
||||
case ssa.TypeI64:
|
||||
if v == 0 {
|
||||
m.InsertMove(vr, xzrVReg, ssa.TypeI64)
|
||||
} else {
|
||||
m.lowerConstantI64(vr, int64(v))
|
||||
}
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
}
|
||||
|
||||
// The following logics are based on the old asm/arm64 package.
|
||||
// https://github.com/tetratelabs/wazero/blob/39f2ff23a6d609e10c82b9cc0b981f6de5b87a9c/internal/asm/arm64/impl.go
|
||||
|
||||
func (m *machine) lowerConstantI32(dst regalloc.VReg, c int32) {
|
||||
// Following the logic here:
|
||||
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637
|
||||
ic := int64(uint32(c))
|
||||
if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) {
|
||||
if isBitMaskImmediate(uint64(c), false) {
|
||||
m.lowerConstViaBitMaskImmediate(uint64(uint32(c)), dst, false)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if t := const16bitAligned(int64(uint32(c))); t >= 0 {
|
||||
// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
|
||||
// We could load it into temporary with movk.
|
||||
m.insertMOVZ(dst, uint64(uint32(c)>>(16*t)), t, false)
|
||||
} else if t := const16bitAligned(int64(^c)); t >= 0 {
|
||||
// Also, if the inverse of the const can fit within 16-bit range, do the same ^^.
|
||||
m.insertMOVN(dst, uint64(^c>>(16*t)), t, false)
|
||||
} else if isBitMaskImmediate(uint64(uint32(c)), false) {
|
||||
m.lowerConstViaBitMaskImmediate(uint64(c), dst, false)
|
||||
} else {
|
||||
// Otherwise, we use MOVZ and MOVK to load it.
|
||||
c16 := uint16(c)
|
||||
m.insertMOVZ(dst, uint64(c16), 0, false)
|
||||
c16 = uint16(uint32(c) >> 16)
|
||||
m.insertMOVK(dst, uint64(c16), 1, false)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerConstantI64(dst regalloc.VReg, c int64) {
|
||||
// Following the logic here:
|
||||
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852
|
||||
if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) {
|
||||
if isBitMaskImmediate(uint64(c), true) {
|
||||
m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if t := const16bitAligned(c); t >= 0 {
|
||||
// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
|
||||
// We could load it into temporary with movk.
|
||||
m.insertMOVZ(dst, uint64(c)>>(16*t), t, true)
|
||||
} else if t := const16bitAligned(^c); t >= 0 {
|
||||
// Also, if the reverse of the const can fit within 16-bit range, do the same ^^.
|
||||
m.insertMOVN(dst, uint64(^c)>>(16*t), t, true)
|
||||
} else if isBitMaskImmediate(uint64(c), true) {
|
||||
m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
|
||||
} else {
|
||||
m.load64bitConst(c, dst)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerConstViaBitMaskImmediate(c uint64, dst regalloc.VReg, b64 bool) {
|
||||
instr := m.allocateInstr()
|
||||
instr.asALUBitmaskImm(aluOpOrr, dst, xzrVReg, c, b64)
|
||||
m.insert(instr)
|
||||
}
|
||||
|
||||
// isBitMaskImmediate determines if the value can be encoded as "bitmask immediate".
|
||||
//
|
||||
// Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits.
|
||||
// Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits.
|
||||
//
|
||||
// See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate-
|
||||
func isBitMaskImmediate(x uint64, _64 bool) bool {
|
||||
// All zeros and ones are not "bitmask immediate" by definition.
|
||||
if x == 0 || (_64 && x == 0xffff_ffff_ffff_ffff) || (!_64 && x == 0xffff_ffff) {
|
||||
return false
|
||||
}
|
||||
|
||||
switch {
|
||||
case x != x>>32|x<<32:
|
||||
// e = 64
|
||||
case x != x>>16|x<<48:
|
||||
// e = 32 (x == x>>32|x<<32).
|
||||
// e.g. 0x00ff_ff00_00ff_ff00
|
||||
x = uint64(int32(x))
|
||||
case x != x>>8|x<<56:
|
||||
// e = 16 (x == x>>16|x<<48).
|
||||
// e.g. 0x00ff_00ff_00ff_00ff
|
||||
x = uint64(int16(x))
|
||||
case x != x>>4|x<<60:
|
||||
// e = 8 (x == x>>8|x<<56).
|
||||
// e.g. 0x0f0f_0f0f_0f0f_0f0f
|
||||
x = uint64(int8(x))
|
||||
default:
|
||||
// e = 4 or 2.
|
||||
return true
|
||||
}
|
||||
return sequenceOfSetbits(x) || sequenceOfSetbits(^x)
|
||||
}
|
||||
|
||||
// sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1).
|
||||
// For example: 0b1110 -> true, 0b1010 -> false
|
||||
func sequenceOfSetbits(x uint64) bool {
|
||||
y := getLowestBit(x)
|
||||
// If x is a sequence of set bit, this should results in the number
|
||||
// with only one set bit (i.e. power of two).
|
||||
y += x
|
||||
return (y-1)&y == 0
|
||||
}
|
||||
|
||||
func getLowestBit(x uint64) uint64 {
|
||||
return x & (^x + 1)
|
||||
}
|
||||
|
||||
// const16bitAligned check if the value is on the 16-bit alignment.
|
||||
// If so, returns the shift num divided by 16, and otherwise -1.
|
||||
func const16bitAligned(v int64) (ret int) {
|
||||
ret = -1
|
||||
for s := 0; s < 64; s += 16 {
|
||||
if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 {
|
||||
ret = s / 16
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit
|
||||
// consts as in the Go assembler.
|
||||
//
|
||||
// See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759
|
||||
func (m *machine) load64bitConst(c int64, dst regalloc.VReg) {
|
||||
var bits [4]uint64
|
||||
var zeros, negs int
|
||||
for i := 0; i < 4; i++ {
|
||||
bits[i] = uint64(c) >> uint(i*16) & 0xffff
|
||||
if v := bits[i]; v == 0 {
|
||||
zeros++
|
||||
} else if v == 0xffff {
|
||||
negs++
|
||||
}
|
||||
}
|
||||
|
||||
if zeros == 3 {
|
||||
// one MOVZ instruction.
|
||||
for i, v := range bits {
|
||||
if v != 0 {
|
||||
m.insertMOVZ(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
} else if negs == 3 {
|
||||
// one MOVN instruction.
|
||||
for i, v := range bits {
|
||||
if v != 0xffff {
|
||||
v = ^v
|
||||
m.insertMOVN(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
} else if zeros == 2 {
|
||||
// one MOVZ then one OVK.
|
||||
var movz bool
|
||||
for i, v := range bits {
|
||||
if !movz && v != 0 { // MOVZ.
|
||||
m.insertMOVZ(dst, v, i, true)
|
||||
movz = true
|
||||
} else if v != 0 {
|
||||
m.insertMOVK(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
|
||||
} else if negs == 2 {
|
||||
// one MOVN then one or two MOVK.
|
||||
var movn bool
|
||||
for i, v := range bits { // Emit MOVN.
|
||||
if !movn && v != 0xffff {
|
||||
v = ^v
|
||||
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
|
||||
m.insertMOVN(dst, v, i, true)
|
||||
movn = true
|
||||
} else if v != 0xffff {
|
||||
m.insertMOVK(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
|
||||
} else if zeros == 1 {
|
||||
// one MOVZ then two MOVK.
|
||||
var movz bool
|
||||
for i, v := range bits {
|
||||
if !movz && v != 0 { // MOVZ.
|
||||
m.insertMOVZ(dst, v, i, true)
|
||||
movz = true
|
||||
} else if v != 0 {
|
||||
m.insertMOVK(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
|
||||
} else if negs == 1 {
|
||||
// one MOVN then two MOVK.
|
||||
var movn bool
|
||||
for i, v := range bits { // Emit MOVN.
|
||||
if !movn && v != 0xffff {
|
||||
v = ^v
|
||||
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
|
||||
m.insertMOVN(dst, v, i, true)
|
||||
movn = true
|
||||
} else if v != 0xffff {
|
||||
m.insertMOVK(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// one MOVZ then up to three MOVK.
|
||||
var movz bool
|
||||
for i, v := range bits {
|
||||
if !movz && v != 0 { // MOVZ.
|
||||
m.insertMOVZ(dst, v, i, true)
|
||||
movz = true
|
||||
} else if v != 0 {
|
||||
m.insertMOVK(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
|
||||
instr := m.allocateInstr()
|
||||
instr.asMOVZ(dst, v, uint64(shift), dst64)
|
||||
m.insert(instr)
|
||||
}
|
||||
|
||||
func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
|
||||
instr := m.allocateInstr()
|
||||
instr.asMOVK(dst, v, uint64(shift), dst64)
|
||||
m.insert(instr)
|
||||
}
|
||||
|
||||
func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
|
||||
instr := m.allocateInstr()
|
||||
instr.asMOVN(dst, v, uint64(shift), dst64)
|
||||
m.insert(instr)
|
||||
}
|
2221
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
generated
vendored
Normal file
2221
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
350
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go
generated
vendored
Normal file
350
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go
generated
vendored
Normal file
@ -0,0 +1,350 @@
|
||||
package arm64
|
||||
|
||||
// This file contains the logic to "find and determine operands" for instructions.
|
||||
// In order to finalize the form of an operand, we might end up merging/eliminating
|
||||
// the source instructions into an operand whenever possible.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
type (
|
||||
// operand represents an operand of an instruction whose type is determined by the kind.
|
||||
operand struct {
|
||||
kind operandKind
|
||||
data, data2 uint64
|
||||
}
|
||||
operandKind byte
|
||||
)
|
||||
|
||||
// Here's the list of operand kinds. We use the abbreviation of the kind name not only for these consts,
|
||||
// but also names of functions which return the operand of the kind.
|
||||
const (
|
||||
// operandKindNR represents "NormalRegister" (NR). This is literally the register without any special operation unlike others.
|
||||
operandKindNR operandKind = iota
|
||||
// operandKindSR represents "Shifted Register" (SR). This is a register which is shifted by a constant.
|
||||
// Some of the arm64 instructions can take this kind of operand.
|
||||
operandKindSR
|
||||
// operandKindER represents "Extended Register (ER). This is a register which is sign/zero-extended to a larger size.
|
||||
// Some of the arm64 instructions can take this kind of operand.
|
||||
operandKindER
|
||||
// operandKindImm12 represents "Immediate 12" (Imm12). This is a 12-bit immediate value which can be either shifted by 12 or not.
|
||||
// See asImm12 function for detail.
|
||||
operandKindImm12
|
||||
// operandKindShiftImm represents "Shifted Immediate" (ShiftImm) used by shift operations.
|
||||
operandKindShiftImm
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer for debugging.
|
||||
func (o operand) format(size byte) string {
|
||||
switch o.kind {
|
||||
case operandKindNR:
|
||||
return formatVRegSized(o.nr(), size)
|
||||
case operandKindSR:
|
||||
r, amt, sop := o.sr()
|
||||
return fmt.Sprintf("%s, %s #%d", formatVRegSized(r, size), sop, amt)
|
||||
case operandKindER:
|
||||
r, eop, _ := o.er()
|
||||
return fmt.Sprintf("%s %s", formatVRegSized(r, size), eop)
|
||||
case operandKindImm12:
|
||||
imm12, shiftBit := o.imm12()
|
||||
if shiftBit == 1 {
|
||||
return fmt.Sprintf("#%#x", uint64(imm12)<<12)
|
||||
} else {
|
||||
return fmt.Sprintf("#%#x", imm12)
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown operand kind: %d", o.kind))
|
||||
}
|
||||
}
|
||||
|
||||
// operandNR encodes the given VReg as an operand of operandKindNR.
|
||||
func operandNR(r regalloc.VReg) operand {
|
||||
return operand{kind: operandKindNR, data: uint64(r)}
|
||||
}
|
||||
|
||||
// nr decodes the underlying VReg assuming the operand is of operandKindNR.
|
||||
func (o operand) nr() regalloc.VReg {
|
||||
return regalloc.VReg(o.data)
|
||||
}
|
||||
|
||||
// operandER encodes the given VReg as an operand of operandKindER.
|
||||
func operandER(r regalloc.VReg, eop extendOp, to byte) operand {
|
||||
if to < 32 {
|
||||
panic("TODO?BUG?: when we need to extend to less than 32 bits?")
|
||||
}
|
||||
return operand{kind: operandKindER, data: uint64(r), data2: uint64(eop)<<32 | uint64(to)}
|
||||
}
|
||||
|
||||
// er decodes the underlying VReg, extend operation, and the target size assuming the operand is of operandKindER.
|
||||
func (o operand) er() (r regalloc.VReg, eop extendOp, to byte) {
|
||||
return regalloc.VReg(o.data), extendOp(o.data2>>32) & 0xff, byte(o.data2 & 0xff)
|
||||
}
|
||||
|
||||
// operandSR encodes the given VReg as an operand of operandKindSR.
|
||||
func operandSR(r regalloc.VReg, amt byte, sop shiftOp) operand {
|
||||
return operand{kind: operandKindSR, data: uint64(r), data2: uint64(amt)<<32 | uint64(sop)}
|
||||
}
|
||||
|
||||
// sr decodes the underlying VReg, shift amount, and shift operation assuming the operand is of operandKindSR.
|
||||
func (o operand) sr() (r regalloc.VReg, amt byte, sop shiftOp) {
|
||||
return regalloc.VReg(o.data), byte(o.data2>>32) & 0xff, shiftOp(o.data2) & 0xff
|
||||
}
|
||||
|
||||
// operandImm12 encodes the given imm12 as an operand of operandKindImm12.
|
||||
func operandImm12(imm12 uint16, shiftBit byte) operand {
|
||||
return operand{kind: operandKindImm12, data: uint64(imm12) | uint64(shiftBit)<<32}
|
||||
}
|
||||
|
||||
// imm12 decodes the underlying imm12 data assuming the operand is of operandKindImm12.
|
||||
func (o operand) imm12() (v uint16, shiftBit byte) {
|
||||
return uint16(o.data), byte(o.data >> 32)
|
||||
}
|
||||
|
||||
// operandShiftImm encodes the given amount as an operand of operandKindShiftImm.
|
||||
func operandShiftImm(amount byte) operand {
|
||||
return operand{kind: operandKindShiftImm, data: uint64(amount)}
|
||||
}
|
||||
|
||||
// shiftImm decodes the underlying shift amount data assuming the operand is of operandKindShiftImm.
|
||||
func (o operand) shiftImm() byte {
|
||||
return byte(o.data)
|
||||
}
|
||||
|
||||
// reg returns the register of the operand if applicable.
|
||||
func (o operand) reg() regalloc.VReg {
|
||||
switch o.kind {
|
||||
case operandKindNR:
|
||||
return o.nr()
|
||||
case operandKindSR:
|
||||
r, _, _ := o.sr()
|
||||
return r
|
||||
case operandKindER:
|
||||
r, _, _ := o.er()
|
||||
return r
|
||||
case operandKindImm12:
|
||||
// Does not have a register.
|
||||
case operandKindShiftImm:
|
||||
// Does not have a register.
|
||||
default:
|
||||
panic(o.kind)
|
||||
}
|
||||
return regalloc.VRegInvalid
|
||||
}
|
||||
|
||||
func (o operand) realReg() regalloc.RealReg {
|
||||
return o.nr().RealReg()
|
||||
}
|
||||
|
||||
func (o operand) assignReg(v regalloc.VReg) operand {
|
||||
switch o.kind {
|
||||
case operandKindNR:
|
||||
return operandNR(v)
|
||||
case operandKindSR:
|
||||
_, amt, sop := o.sr()
|
||||
return operandSR(v, amt, sop)
|
||||
case operandKindER:
|
||||
_, eop, to := o.er()
|
||||
return operandER(v, eop, to)
|
||||
case operandKindImm12:
|
||||
// Does not have a register.
|
||||
case operandKindShiftImm:
|
||||
// Does not have a register.
|
||||
}
|
||||
panic(o.kind)
|
||||
}
|
||||
|
||||
// ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def).
|
||||
//
|
||||
// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
|
||||
// If the operand can be expressed as operandKindImm12, `mode` is ignored.
|
||||
func (m *machine) getOperand_Imm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return operandNR(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
instr := def.Instr
|
||||
if instr.Opcode() == ssa.OpcodeIconst {
|
||||
if imm12Op, ok := asImm12Operand(instr.ConstantVal()); ok {
|
||||
instr.MarkLowered()
|
||||
return imm12Op
|
||||
}
|
||||
}
|
||||
return m.getOperand_ER_SR_NR(def, mode)
|
||||
}
|
||||
|
||||
// getOperand_MaybeNegatedImm12_ER_SR_NR is almost the same as getOperand_Imm12_ER_SR_NR, but this might negate the immediate value.
|
||||
// If the immediate value is negated, the second return value is true, otherwise always false.
|
||||
func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) {
|
||||
if def.IsFromBlockParam() {
|
||||
return operandNR(def.BlkParamVReg), false
|
||||
}
|
||||
|
||||
instr := def.Instr
|
||||
if instr.Opcode() == ssa.OpcodeIconst {
|
||||
c := instr.ConstantVal()
|
||||
if imm12Op, ok := asImm12Operand(c); ok {
|
||||
instr.MarkLowered()
|
||||
return imm12Op, false
|
||||
}
|
||||
|
||||
signExtended := int64(c)
|
||||
if def.SSAValue().Type().Bits() == 32 {
|
||||
signExtended = (signExtended << 32) >> 32
|
||||
}
|
||||
negatedWithoutSign := -signExtended
|
||||
if imm12Op, ok := asImm12Operand(uint64(negatedWithoutSign)); ok {
|
||||
instr.MarkLowered()
|
||||
return imm12Op, true
|
||||
}
|
||||
}
|
||||
return m.getOperand_ER_SR_NR(def, mode), false
|
||||
}
|
||||
|
||||
// ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def).
|
||||
//
|
||||
// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
|
||||
func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return operandNR(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
if m.compiler.MatchInstr(def, ssa.OpcodeSExtend) || m.compiler.MatchInstr(def, ssa.OpcodeUExtend) {
|
||||
extInstr := def.Instr
|
||||
|
||||
signed := extInstr.Opcode() == ssa.OpcodeSExtend
|
||||
innerExtFromBits, innerExtToBits := extInstr.ExtendFromToBits()
|
||||
modeBits, modeSigned := mode.bits(), mode.signed()
|
||||
if mode == extModeNone || innerExtToBits == modeBits {
|
||||
eop := extendOpFrom(signed, innerExtFromBits)
|
||||
extArg := m.getOperand_NR(m.compiler.ValueDefinition(extInstr.Arg()), extModeNone)
|
||||
op = operandER(extArg.nr(), eop, innerExtToBits)
|
||||
extInstr.MarkLowered()
|
||||
return
|
||||
}
|
||||
|
||||
if innerExtToBits > modeBits {
|
||||
panic("BUG?TODO?: need the results of inner extension to be larger than the mode")
|
||||
}
|
||||
|
||||
switch {
|
||||
case (!signed && !modeSigned) || (signed && modeSigned):
|
||||
// Two sign/zero extensions are equivalent to one sign/zero extension for the larger size.
|
||||
eop := extendOpFrom(modeSigned, innerExtFromBits)
|
||||
op = operandER(m.compiler.VRegOf(extInstr.Arg()), eop, modeBits)
|
||||
extInstr.MarkLowered()
|
||||
case (signed && !modeSigned) || (!signed && modeSigned):
|
||||
// We need to {sign, zero}-extend the result of the {zero,sign} extension.
|
||||
eop := extendOpFrom(modeSigned, innerExtToBits)
|
||||
op = operandER(m.compiler.VRegOf(extInstr.Return()), eop, modeBits)
|
||||
// Note that we failed to merge the inner extension instruction this case.
|
||||
}
|
||||
return
|
||||
}
|
||||
return m.getOperand_SR_NR(def, mode)
|
||||
}
|
||||
|
||||
// ensureValueNR returns an operand of either operandKindSR or operandKindNR from the given value (defined by `def).
|
||||
//
|
||||
// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
|
||||
func (m *machine) getOperand_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return operandNR(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
if m.compiler.MatchInstr(def, ssa.OpcodeIshl) {
|
||||
// Check if the shift amount is constant instruction.
|
||||
targetVal, amountVal := def.Instr.Arg2()
|
||||
targetVReg := m.getOperand_NR(m.compiler.ValueDefinition(targetVal), extModeNone).nr()
|
||||
amountDef := m.compiler.ValueDefinition(amountVal)
|
||||
if amountDef.IsFromInstr() && amountDef.Instr.Constant() {
|
||||
// If that is the case, we can use the shifted register operand (SR).
|
||||
c := byte(amountDef.Instr.ConstantVal()) & (targetVal.Type().Bits() - 1) // Clears the unnecessary bits.
|
||||
def.Instr.MarkLowered()
|
||||
amountDef.Instr.MarkLowered()
|
||||
return operandSR(targetVReg, c, shiftOpLSL)
|
||||
}
|
||||
}
|
||||
return m.getOperand_NR(def, mode)
|
||||
}
|
||||
|
||||
// getOperand_ShiftImm_NR returns an operand of either operandKindShiftImm or operandKindNR from the given value (defined by `def).
|
||||
func (m *machine) getOperand_ShiftImm_NR(def *backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return operandNR(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
instr := def.Instr
|
||||
if instr.Constant() {
|
||||
amount := byte(instr.ConstantVal()) & (shiftBitWidth - 1) // Clears the unnecessary bits.
|
||||
return operandShiftImm(amount)
|
||||
}
|
||||
return m.getOperand_NR(def, mode)
|
||||
}
|
||||
|
||||
// ensureValueNR returns an operand of operandKindNR from the given value (defined by `def).
|
||||
//
|
||||
// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
|
||||
func (m *machine) getOperand_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
|
||||
var v regalloc.VReg
|
||||
if def.IsFromBlockParam() {
|
||||
v = def.BlkParamVReg
|
||||
} else {
|
||||
instr := def.Instr
|
||||
if instr.Constant() {
|
||||
// We inline all the constant instructions so that we could reduce the register usage.
|
||||
v = m.lowerConstant(instr)
|
||||
instr.MarkLowered()
|
||||
} else {
|
||||
if n := def.N; n == 0 {
|
||||
v = m.compiler.VRegOf(instr.Return())
|
||||
} else {
|
||||
_, rs := instr.Returns()
|
||||
v = m.compiler.VRegOf(rs[n-1])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r := v
|
||||
switch inBits := def.SSAValue().Type().Bits(); {
|
||||
case mode == extModeNone:
|
||||
case inBits == 32 && (mode == extModeZeroExtend32 || mode == extModeSignExtend32):
|
||||
case inBits == 32 && mode == extModeZeroExtend64:
|
||||
extended := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
ext := m.allocateInstr()
|
||||
ext.asExtend(extended, v, 32, 64, false)
|
||||
m.insert(ext)
|
||||
r = extended
|
||||
case inBits == 32 && mode == extModeSignExtend64:
|
||||
extended := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
ext := m.allocateInstr()
|
||||
ext.asExtend(extended, v, 32, 64, true)
|
||||
m.insert(ext)
|
||||
r = extended
|
||||
case inBits == 64 && (mode == extModeZeroExtend64 || mode == extModeSignExtend64):
|
||||
}
|
||||
return operandNR(r)
|
||||
}
|
||||
|
||||
func asImm12Operand(val uint64) (op operand, ok bool) {
|
||||
v, shiftBit, ok := asImm12(val)
|
||||
if !ok {
|
||||
return operand{}, false
|
||||
}
|
||||
return operandImm12(v, shiftBit), true
|
||||
}
|
||||
|
||||
func asImm12(val uint64) (v uint16, shiftBit byte, ok bool) {
|
||||
const mask1, mask2 uint64 = 0xfff, 0xfff_000
|
||||
if val&^mask1 == 0 {
|
||||
return uint16(val), 0, true
|
||||
} else if val&^mask2 == 0 {
|
||||
return uint16(val >> 12), 1, true
|
||||
} else {
|
||||
return 0, 0, false
|
||||
}
|
||||
}
|
440
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
generated
vendored
Normal file
440
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
generated
vendored
Normal file
@ -0,0 +1,440 @@
|
||||
package arm64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
type (
|
||||
// addressMode represents an ARM64 addressing mode.
|
||||
//
|
||||
// https://developer.arm.com/documentation/102374/0101/Loads-and-stores---addressing
|
||||
// TODO: use the bit-packed layout like operand struct.
|
||||
addressMode struct {
|
||||
kind addressModeKind
|
||||
rn, rm regalloc.VReg
|
||||
extOp extendOp
|
||||
imm int64
|
||||
}
|
||||
|
||||
// addressModeKind represents the kind of ARM64 addressing mode.
|
||||
addressModeKind byte
|
||||
)
|
||||
|
||||
const (
|
||||
// addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended,
|
||||
// and then scaled by bits(type)/8.
|
||||
//
|
||||
// e.g.
|
||||
// - ldrh w1, [x2, w3, SXTW #1] ;; sign-extended and scaled by 2 (== LSL #1)
|
||||
// - strh w1, [x2, w3, UXTW #1] ;; zero-extended and scaled by 2 (== LSL #1)
|
||||
// - ldr w1, [x2, w3, SXTW #2] ;; sign-extended and scaled by 4 (== LSL #2)
|
||||
// - str x1, [x2, w3, UXTW #3] ;; zero-extended and scaled by 8 (== LSL #3)
|
||||
//
|
||||
// See the following pages:
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--register---Load-Register-Halfword--register--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--
|
||||
addressModeKindRegScaledExtended addressModeKind = iota
|
||||
|
||||
// addressModeKindRegScaled is the same as addressModeKindRegScaledExtended, but without extension factor.
|
||||
addressModeKindRegScaled
|
||||
|
||||
// addressModeKindRegScaled is the same as addressModeKindRegScaledExtended, but without scale factor.
|
||||
addressModeKindRegExtended
|
||||
|
||||
// addressModeKindRegReg takes a base register and an index register. The index register is not either scaled or extended.
|
||||
addressModeKindRegReg
|
||||
|
||||
// addressModeKindRegSignedImm9 takes a base register and a 9-bit "signed" immediate offset (-256 to 255).
|
||||
// The immediate will be sign-extended, and be added to the base register.
|
||||
// This is a.k.a. "unscaled" since the immediate is not scaled.
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--
|
||||
addressModeKindRegSignedImm9
|
||||
|
||||
// addressModeKindRegUnsignedImm12 takes a base register and a 12-bit "unsigned" immediate offset. scaled by
|
||||
// the size of the type. In other words, the actual offset will be imm12 * bits(type)/8.
|
||||
// See "Unsigned offset" in the following pages:
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
|
||||
addressModeKindRegUnsignedImm12
|
||||
|
||||
// addressModePostIndex takes a base register and a 9-bit "signed" immediate offset.
|
||||
// After the load/store, the base register will be updated by the offset.
|
||||
//
|
||||
// Note that when this is used for pair load/store, the offset will be 7-bit "signed" immediate offset.
|
||||
//
|
||||
// See "Post-index" in the following pages for examples:
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-
|
||||
addressModeKindPostIndex
|
||||
|
||||
// addressModePostIndex takes a base register and a 9-bit "signed" immediate offset.
|
||||
// Before the load/store, the base register will be updated by the offset.
|
||||
//
|
||||
// Note that when this is used for pair load/store, the offset will be 7-bit "signed" immediate offset.
|
||||
//
|
||||
// See "Pre-index" in the following pages for examples:
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-
|
||||
addressModeKindPreIndex
|
||||
|
||||
// addressModeKindArgStackSpace is used to resolve the address of the argument stack space
|
||||
// exiting right above the stack pointer. Since we don't know the exact stack space needed for a function
|
||||
// at a compilation phase, this is used as a placeholder and further lowered to a real addressing mode like above.
|
||||
addressModeKindArgStackSpace
|
||||
|
||||
// addressModeKindResultStackSpace is used to resolve the address of the result stack space
|
||||
// exiting right above the stack pointer. Since we don't know the exact stack space needed for a function
|
||||
// at a compilation phase, this is used as a placeholder and further lowered to a real addressing mode like above.
|
||||
addressModeKindResultStackSpace
|
||||
)
|
||||
|
||||
func (a addressMode) format(dstSizeBits byte) (ret string) {
|
||||
base := formatVRegSized(a.rn, 64)
|
||||
if rn := a.rn; rn.RegType() != regalloc.RegTypeInt {
|
||||
panic("invalid base register type: " + a.rn.RegType().String())
|
||||
} else if rn.IsRealReg() && v0 <= a.rn.RealReg() && a.rn.RealReg() <= v30 {
|
||||
panic("BUG: likely a bug in reg alloc or reset behavior")
|
||||
}
|
||||
|
||||
switch a.kind {
|
||||
case addressModeKindRegScaledExtended:
|
||||
amount := a.sizeInBitsToShiftAmount(dstSizeBits)
|
||||
ret = fmt.Sprintf("[%s, %s, %s #%#x]", base, formatVRegSized(a.rm, a.indexRegBits()), a.extOp, amount)
|
||||
case addressModeKindRegScaled:
|
||||
amount := a.sizeInBitsToShiftAmount(dstSizeBits)
|
||||
ret = fmt.Sprintf("[%s, %s, lsl #%#x]", base, formatVRegSized(a.rm, a.indexRegBits()), amount)
|
||||
case addressModeKindRegExtended:
|
||||
ret = fmt.Sprintf("[%s, %s, %s]", base, formatVRegSized(a.rm, a.indexRegBits()), a.extOp)
|
||||
case addressModeKindRegReg:
|
||||
ret = fmt.Sprintf("[%s, %s]", base, formatVRegSized(a.rm, a.indexRegBits()))
|
||||
case addressModeKindRegSignedImm9:
|
||||
if a.imm != 0 {
|
||||
ret = fmt.Sprintf("[%s, #%#x]", base, a.imm)
|
||||
} else {
|
||||
ret = fmt.Sprintf("[%s]", base)
|
||||
}
|
||||
case addressModeKindRegUnsignedImm12:
|
||||
if a.imm != 0 {
|
||||
ret = fmt.Sprintf("[%s, #%#x]", base, a.imm)
|
||||
} else {
|
||||
ret = fmt.Sprintf("[%s]", base)
|
||||
}
|
||||
case addressModeKindPostIndex:
|
||||
ret = fmt.Sprintf("[%s], #%#x", base, a.imm)
|
||||
case addressModeKindPreIndex:
|
||||
ret = fmt.Sprintf("[%s, #%#x]!", base, a.imm)
|
||||
case addressModeKindArgStackSpace:
|
||||
ret = fmt.Sprintf("[#arg_space, #%#x]", a.imm)
|
||||
case addressModeKindResultStackSpace:
|
||||
ret = fmt.Sprintf("[#ret_space, #%#x]", a.imm)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func addressModePreOrPostIndex(rn regalloc.VReg, imm int64, preIndex bool) addressMode {
|
||||
if !offsetFitsInAddressModeKindRegSignedImm9(imm) {
|
||||
panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm))
|
||||
}
|
||||
if preIndex {
|
||||
return addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm}
|
||||
} else {
|
||||
return addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm}
|
||||
}
|
||||
}
|
||||
|
||||
func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool {
|
||||
divisor := int64(dstSizeInBits) / 8
|
||||
return 0 < offset && offset%divisor == 0 && offset/divisor < 4096
|
||||
}
|
||||
|
||||
func offsetFitsInAddressModeKindRegSignedImm9(offset int64) bool {
|
||||
return -256 <= offset && offset <= 255
|
||||
}
|
||||
|
||||
func (a addressMode) indexRegBits() byte {
|
||||
bits := a.extOp.srcBits()
|
||||
if bits != 32 && bits != 64 {
|
||||
panic("invalid index register for address mode. it must be either 32 or 64 bits")
|
||||
}
|
||||
return bits
|
||||
}
|
||||
|
||||
func (a addressMode) sizeInBitsToShiftAmount(sizeInBits byte) (lsl byte) {
|
||||
switch sizeInBits {
|
||||
case 8:
|
||||
lsl = 0
|
||||
case 16:
|
||||
lsl = 1
|
||||
case 32:
|
||||
lsl = 2
|
||||
case 64:
|
||||
lsl = 3
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func extLoadSignSize(op ssa.Opcode) (size byte, signed bool) {
|
||||
switch op {
|
||||
case ssa.OpcodeUload8:
|
||||
size, signed = 8, false
|
||||
case ssa.OpcodeUload16:
|
||||
size, signed = 16, false
|
||||
case ssa.OpcodeUload32:
|
||||
size, signed = 32, false
|
||||
case ssa.OpcodeSload8:
|
||||
size, signed = 8, true
|
||||
case ssa.OpcodeSload16:
|
||||
size, signed = 16, true
|
||||
case ssa.OpcodeSload32:
|
||||
size, signed = 32, true
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret regalloc.VReg) {
|
||||
size, signed := extLoadSignSize(op)
|
||||
amode := m.lowerToAddressMode(ptr, offset, size)
|
||||
load := m.allocateInstr()
|
||||
if signed {
|
||||
load.asSLoad(operandNR(ret), amode, size)
|
||||
} else {
|
||||
load.asULoad(operandNR(ret), amode, size)
|
||||
}
|
||||
m.insert(load)
|
||||
}
|
||||
|
||||
func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa.Value) {
|
||||
amode := m.lowerToAddressMode(ptr, offset, typ.Bits())
|
||||
|
||||
dst := m.compiler.VRegOf(ret)
|
||||
load := m.allocateInstr()
|
||||
switch typ {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
load.asULoad(operandNR(dst), amode, typ.Bits())
|
||||
case ssa.TypeF32, ssa.TypeF64:
|
||||
load.asFpuLoad(operandNR(dst), amode, typ.Bits())
|
||||
case ssa.TypeV128:
|
||||
load.asFpuLoad(operandNR(dst), amode, 128)
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
m.insert(load)
|
||||
}
|
||||
|
||||
func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane, ret ssa.Value) {
|
||||
// vecLoad1R has offset address mode (base+imm) only for post index, so we simply add the offset to the base.
|
||||
base := m.getOperand_NR(m.compiler.ValueDefinition(ptr), extModeNone).nr()
|
||||
offsetReg := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerConstantI64(offsetReg, int64(offset))
|
||||
addedBase := m.addReg64ToReg64(base, offsetReg)
|
||||
|
||||
rd := operandNR(m.compiler.VRegOf(ret))
|
||||
|
||||
ld1r := m.allocateInstr()
|
||||
ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane))
|
||||
m.insert(ld1r)
|
||||
}
|
||||
|
||||
func (m *machine) lowerStore(si *ssa.Instruction) {
|
||||
// TODO: merge consecutive stores into a single pair store instruction.
|
||||
value, ptr, offset, storeSizeInBits := si.StoreData()
|
||||
amode := m.lowerToAddressMode(ptr, offset, storeSizeInBits)
|
||||
|
||||
valueOp := m.getOperand_NR(m.compiler.ValueDefinition(value), extModeNone)
|
||||
store := m.allocateInstr()
|
||||
store.asStore(valueOp, amode, storeSizeInBits)
|
||||
m.insert(store)
|
||||
}
|
||||
|
||||
// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
|
||||
func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode addressMode) {
|
||||
// TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and
|
||||
// addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed
|
||||
// to support more efficient address resolution.
|
||||
|
||||
a32s, a64s, offset := m.collectAddends(ptr)
|
||||
offset += int64(offsetBase)
|
||||
return m.lowerToAddressModeFromAddends(a32s, a64s, size, offset)
|
||||
}
|
||||
|
||||
// lowerToAddressModeFromAddends creates an addressMode from a list of addends collected by collectAddends.
|
||||
// During the construction, this might emit additional instructions.
|
||||
//
|
||||
// Extracted as a separate function for easy testing.
|
||||
func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode addressMode) {
|
||||
switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); {
|
||||
case a64sExist && a32sExist:
|
||||
var base regalloc.VReg
|
||||
base = a64s.Dequeue()
|
||||
var a32 addend32
|
||||
a32 = a32s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
|
||||
case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset):
|
||||
var base regalloc.VReg
|
||||
base = a64s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
|
||||
offset = 0
|
||||
case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset):
|
||||
var base regalloc.VReg
|
||||
base = a64s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
|
||||
offset = 0
|
||||
case a64sExist:
|
||||
var base regalloc.VReg
|
||||
base = a64s.Dequeue()
|
||||
if !a64s.Empty() {
|
||||
index := a64s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
|
||||
} else {
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
|
||||
}
|
||||
case a32sExist:
|
||||
base32 := a32s.Dequeue()
|
||||
|
||||
// First we need 64-bit base.
|
||||
base := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
baseExt := m.allocateInstr()
|
||||
var signed bool
|
||||
if base32.ext == extendOpSXTW {
|
||||
signed = true
|
||||
}
|
||||
baseExt.asExtend(base, base32.r, 32, 64, signed)
|
||||
m.insert(baseExt)
|
||||
|
||||
if !a32s.Empty() {
|
||||
index := a32s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
|
||||
} else {
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
|
||||
}
|
||||
default: // Only static offsets.
|
||||
tmpReg := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerConstantI64(tmpReg, offset)
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0}
|
||||
offset = 0
|
||||
}
|
||||
|
||||
baseReg := amode.rn
|
||||
if offset > 0 {
|
||||
baseReg = m.addConstToReg64(baseReg, offset) // baseReg += offset
|
||||
}
|
||||
|
||||
for !a64s.Empty() {
|
||||
a64 := a64s.Dequeue()
|
||||
baseReg = m.addReg64ToReg64(baseReg, a64) // baseReg += a64
|
||||
}
|
||||
|
||||
for !a32s.Empty() {
|
||||
a32 := a32s.Dequeue()
|
||||
baseReg = m.addRegToReg64Ext(baseReg, a32.r, a32.ext) // baseReg += (a32 extended to 64-bit)
|
||||
}
|
||||
amode.rn = baseReg
|
||||
return
|
||||
}
|
||||
|
||||
var addendsMatchOpcodes = [4]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst}
|
||||
|
||||
func (m *machine) collectAddends(ptr ssa.Value) (addends32 *wazevoapi.Queue[addend32], addends64 *wazevoapi.Queue[regalloc.VReg], offset int64) {
|
||||
m.addendsWorkQueue.Reset()
|
||||
m.addends32.Reset()
|
||||
m.addends64.Reset()
|
||||
m.addendsWorkQueue.Enqueue(ptr)
|
||||
|
||||
for !m.addendsWorkQueue.Empty() {
|
||||
v := m.addendsWorkQueue.Dequeue()
|
||||
|
||||
def := m.compiler.ValueDefinition(v)
|
||||
switch op := m.compiler.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op {
|
||||
case ssa.OpcodeIadd:
|
||||
// If the addend is an add, we recursively collect its operands.
|
||||
x, y := def.Instr.Arg2()
|
||||
m.addendsWorkQueue.Enqueue(x)
|
||||
m.addendsWorkQueue.Enqueue(y)
|
||||
def.Instr.MarkLowered()
|
||||
case ssa.OpcodeIconst:
|
||||
// If the addend is constant, we just statically merge it into the offset.
|
||||
ic := def.Instr
|
||||
u64 := ic.ConstantVal()
|
||||
if ic.Return().Type().Bits() == 32 {
|
||||
offset += int64(int32(u64)) // sign-extend.
|
||||
} else {
|
||||
offset += int64(u64)
|
||||
}
|
||||
def.Instr.MarkLowered()
|
||||
case ssa.OpcodeUExtend, ssa.OpcodeSExtend:
|
||||
input := def.Instr.Arg()
|
||||
if input.Type().Bits() != 32 {
|
||||
panic("illegal size: " + input.Type().String())
|
||||
}
|
||||
|
||||
var ext extendOp
|
||||
if op == ssa.OpcodeUExtend {
|
||||
ext = extendOpUXTW
|
||||
} else {
|
||||
ext = extendOpSXTW
|
||||
}
|
||||
|
||||
inputDef := m.compiler.ValueDefinition(input)
|
||||
constInst := inputDef.IsFromInstr() && inputDef.Instr.Constant()
|
||||
switch {
|
||||
case constInst && ext == extendOpUXTW:
|
||||
// Zero-extension of a 32-bit constant can be merged into the offset.
|
||||
offset += int64(uint32(inputDef.Instr.ConstantVal()))
|
||||
case constInst && ext == extendOpSXTW:
|
||||
// Sign-extension of a 32-bit constant can be merged into the offset.
|
||||
offset += int64(int32(inputDef.Instr.ConstantVal())) // sign-extend!
|
||||
default:
|
||||
m.addends32.Enqueue(addend32{r: m.getOperand_NR(inputDef, extModeNone).nr(), ext: ext})
|
||||
}
|
||||
def.Instr.MarkLowered()
|
||||
continue
|
||||
default:
|
||||
// If the addend is not one of them, we simply use it as-is (without merging!), optionally zero-extending it.
|
||||
m.addends64.Enqueue(m.getOperand_NR(def, extModeZeroExtend64 /* optional zero ext */).nr())
|
||||
}
|
||||
}
|
||||
return &m.addends32, &m.addends64, offset
|
||||
}
|
||||
|
||||
func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
|
||||
rd = m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
alu := m.allocateInstr()
|
||||
if imm12Op, ok := asImm12Operand(uint64(c)); ok {
|
||||
alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), imm12Op, true)
|
||||
} else if imm12Op, ok = asImm12Operand(uint64(-c)); ok {
|
||||
alu.asALU(aluOpSub, operandNR(rd), operandNR(r), imm12Op, true)
|
||||
} else {
|
||||
tmp := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
m.load64bitConst(c, tmp)
|
||||
alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), operandNR(tmp), true)
|
||||
}
|
||||
m.insert(alu)
|
||||
return
|
||||
}
|
||||
|
||||
func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) {
|
||||
rd = m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
alu := m.allocateInstr()
|
||||
alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandNR(rm), true)
|
||||
m.insert(alu)
|
||||
return
|
||||
}
|
||||
|
||||
func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) {
|
||||
rd = m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
alu := m.allocateInstr()
|
||||
alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandER(rm, ext, 64), true)
|
||||
m.insert(alu)
|
||||
return
|
||||
}
|
515
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
generated
vendored
Normal file
515
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
generated
vendored
Normal file
@ -0,0 +1,515 @@
|
||||
package arm64
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
type (
|
||||
// machine implements backend.Machine.
|
||||
machine struct {
|
||||
compiler backend.Compiler
|
||||
executableContext *backend.ExecutableContextT[instruction]
|
||||
currentABI *backend.FunctionABI
|
||||
|
||||
regAlloc regalloc.Allocator
|
||||
regAllocFn *backend.RegAllocFunction[*instruction, *machine]
|
||||
|
||||
// addendsWorkQueue is used during address lowering, defined here for reuse.
|
||||
addendsWorkQueue wazevoapi.Queue[ssa.Value]
|
||||
addends32 wazevoapi.Queue[addend32]
|
||||
// addends64 is used during address lowering, defined here for reuse.
|
||||
addends64 wazevoapi.Queue[regalloc.VReg]
|
||||
unresolvedAddressModes []*instruction
|
||||
|
||||
// condBrRelocs holds the conditional branches which need offset relocation.
|
||||
condBrRelocs []condBrReloc
|
||||
|
||||
// jmpTableTargets holds the labels of the jump table targets.
|
||||
jmpTableTargets [][]uint32
|
||||
|
||||
// spillSlotSize is the size of the stack slot in bytes used for spilling registers.
|
||||
// During the execution of the function, the stack looks like:
|
||||
//
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | xxxxx |
|
||||
// | ReturnAddress |
|
||||
// +-----------------+ <<-|
|
||||
// | ........... | |
|
||||
// | spill slot M | | <--- spillSlotSize
|
||||
// | ............ | |
|
||||
// | spill slot 2 | |
|
||||
// | spill slot 1 | <<-+
|
||||
// | clobbered N |
|
||||
// | ........... |
|
||||
// | clobbered 1 |
|
||||
// | clobbered 0 |
|
||||
// SP---> +-----------------+
|
||||
// (low address)
|
||||
//
|
||||
// and it represents the size of the space between FP and the first spilled slot. This must be a multiple of 16.
|
||||
// Also note that this is only known after register allocation.
|
||||
spillSlotSize int64
|
||||
spillSlots map[regalloc.VRegID]int64 // regalloc.VRegID to offset.
|
||||
// clobberedRegs holds real-register backed VRegs saved at the function prologue, and restored at the epilogue.
|
||||
clobberedRegs []regalloc.VReg
|
||||
|
||||
maxRequiredStackSizeForCalls int64
|
||||
stackBoundsCheckDisabled bool
|
||||
|
||||
regAllocStarted bool
|
||||
}
|
||||
|
||||
addend32 struct {
|
||||
r regalloc.VReg
|
||||
ext extendOp
|
||||
}
|
||||
|
||||
condBrReloc struct {
|
||||
cbr *instruction
|
||||
// currentLabelPos is the labelPosition within which condBr is defined.
|
||||
currentLabelPos *labelPosition
|
||||
// Next block's labelPosition.
|
||||
nextLabel label
|
||||
offset int64
|
||||
}
|
||||
|
||||
labelPosition = backend.LabelPosition[instruction]
|
||||
label = backend.Label
|
||||
)
|
||||
|
||||
const (
|
||||
labelReturn = backend.LabelReturn
|
||||
labelInvalid = backend.LabelInvalid
|
||||
)
|
||||
|
||||
// NewBackend returns a new backend for arm64.
|
||||
func NewBackend() backend.Machine {
|
||||
m := &machine{
|
||||
spillSlots: make(map[regalloc.VRegID]int64),
|
||||
executableContext: newExecutableContext(),
|
||||
regAlloc: regalloc.NewAllocator(regInfo),
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func newExecutableContext() *backend.ExecutableContextT[instruction] {
|
||||
return backend.NewExecutableContextT[instruction](resetInstruction, setNext, setPrev, asNop0)
|
||||
}
|
||||
|
||||
// ExecutableContext implements backend.Machine.
|
||||
func (m *machine) ExecutableContext() backend.ExecutableContext {
|
||||
return m.executableContext
|
||||
}
|
||||
|
||||
// RegAlloc implements backend.Machine Function.
|
||||
func (m *machine) RegAlloc() {
|
||||
rf := m.regAllocFn
|
||||
for _, pos := range m.executableContext.OrderedBlockLabels {
|
||||
rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End)
|
||||
}
|
||||
|
||||
m.regAllocStarted = true
|
||||
m.regAlloc.DoAllocation(rf)
|
||||
// Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes.
|
||||
m.spillSlotSize = (m.spillSlotSize + 15) &^ 15
|
||||
}
|
||||
|
||||
// Reset implements backend.Machine.
|
||||
func (m *machine) Reset() {
|
||||
m.clobberedRegs = m.clobberedRegs[:0]
|
||||
for key := range m.spillSlots {
|
||||
m.clobberedRegs = append(m.clobberedRegs, regalloc.VReg(key))
|
||||
}
|
||||
for _, key := range m.clobberedRegs {
|
||||
delete(m.spillSlots, regalloc.VRegID(key))
|
||||
}
|
||||
m.clobberedRegs = m.clobberedRegs[:0]
|
||||
m.regAllocStarted = false
|
||||
m.regAlloc.Reset()
|
||||
m.regAllocFn.Reset()
|
||||
m.spillSlotSize = 0
|
||||
m.unresolvedAddressModes = m.unresolvedAddressModes[:0]
|
||||
m.maxRequiredStackSizeForCalls = 0
|
||||
m.executableContext.Reset()
|
||||
m.jmpTableTargets = m.jmpTableTargets[:0]
|
||||
}
|
||||
|
||||
// SetCurrentABI implements backend.Machine SetCurrentABI.
|
||||
func (m *machine) SetCurrentABI(abi *backend.FunctionABI) {
|
||||
m.currentABI = abi
|
||||
}
|
||||
|
||||
// DisableStackCheck implements backend.Machine DisableStackCheck.
|
||||
func (m *machine) DisableStackCheck() {
|
||||
m.stackBoundsCheckDisabled = true
|
||||
}
|
||||
|
||||
// SetCompiler implements backend.Machine.
|
||||
func (m *machine) SetCompiler(ctx backend.Compiler) {
|
||||
m.compiler = ctx
|
||||
m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, ctx.SSABuilder(), ctx)
|
||||
}
|
||||
|
||||
func (m *machine) insert(i *instruction) {
|
||||
ectx := m.executableContext
|
||||
ectx.PendingInstructions = append(ectx.PendingInstructions, i)
|
||||
}
|
||||
|
||||
func (m *machine) insertBrTargetLabel() label {
|
||||
nop, l := m.allocateBrTarget()
|
||||
m.insert(nop)
|
||||
return l
|
||||
}
|
||||
|
||||
func (m *machine) allocateBrTarget() (nop *instruction, l label) {
|
||||
ectx := m.executableContext
|
||||
l = ectx.AllocateLabel()
|
||||
nop = m.allocateInstr()
|
||||
nop.asNop0WithLabel(l)
|
||||
pos := ectx.AllocateLabelPosition(l)
|
||||
pos.Begin, pos.End = nop, nop
|
||||
ectx.LabelPositions[l] = pos
|
||||
return
|
||||
}
|
||||
|
||||
// allocateInstr allocates an instruction.
|
||||
func (m *machine) allocateInstr() *instruction {
|
||||
instr := m.executableContext.InstructionPool.Allocate()
|
||||
if !m.regAllocStarted {
|
||||
instr.addedBeforeRegAlloc = true
|
||||
}
|
||||
return instr
|
||||
}
|
||||
|
||||
func resetInstruction(i *instruction) {
|
||||
*i = instruction{}
|
||||
}
|
||||
|
||||
func (m *machine) allocateNop() *instruction {
|
||||
instr := m.allocateInstr()
|
||||
instr.asNop0()
|
||||
return instr
|
||||
}
|
||||
|
||||
func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) {
|
||||
amode := &i.amode
|
||||
switch amode.kind {
|
||||
case addressModeKindResultStackSpace:
|
||||
amode.imm += ret0offset
|
||||
case addressModeKindArgStackSpace:
|
||||
amode.imm += arg0offset
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
|
||||
var sizeInBits byte
|
||||
switch i.kind {
|
||||
case store8, uLoad8:
|
||||
sizeInBits = 8
|
||||
case store16, uLoad16:
|
||||
sizeInBits = 16
|
||||
case store32, fpuStore32, uLoad32, fpuLoad32:
|
||||
sizeInBits = 32
|
||||
case store64, fpuStore64, uLoad64, fpuLoad64:
|
||||
sizeInBits = 64
|
||||
case fpuStore128, fpuLoad128:
|
||||
sizeInBits = 128
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
|
||||
if offsetFitsInAddressModeKindRegUnsignedImm12(sizeInBits, amode.imm) {
|
||||
amode.kind = addressModeKindRegUnsignedImm12
|
||||
} else {
|
||||
// This case, we load the offset into the temporary register,
|
||||
// and then use it as the index register.
|
||||
newPrev := m.lowerConstantI64AndInsert(i.prev, tmpRegVReg, amode.imm)
|
||||
linkInstr(newPrev, i)
|
||||
*amode = addressMode{kind: addressModeKindRegReg, rn: amode.rn, rm: tmpRegVReg, extOp: extendOpUXTX /* indicates rm reg is 64-bit */}
|
||||
}
|
||||
}
|
||||
|
||||
// resolveRelativeAddresses resolves the relative addresses before encoding.
|
||||
func (m *machine) resolveRelativeAddresses(ctx context.Context) {
|
||||
ectx := m.executableContext
|
||||
for {
|
||||
if len(m.unresolvedAddressModes) > 0 {
|
||||
arg0offset, ret0offset := m.arg0OffsetFromSP(), m.ret0OffsetFromSP()
|
||||
for _, i := range m.unresolvedAddressModes {
|
||||
m.resolveAddressingMode(arg0offset, ret0offset, i)
|
||||
}
|
||||
}
|
||||
|
||||
// Reuse the slice to gather the unresolved conditional branches.
|
||||
m.condBrRelocs = m.condBrRelocs[:0]
|
||||
|
||||
var fn string
|
||||
var fnIndex int
|
||||
var labelToSSABlockID map[label]ssa.BasicBlockID
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
fn = wazevoapi.GetCurrentFunctionName(ctx)
|
||||
labelToSSABlockID = make(map[label]ssa.BasicBlockID)
|
||||
for i, l := range ectx.SsaBlockIDToLabels {
|
||||
labelToSSABlockID[l] = ssa.BasicBlockID(i)
|
||||
}
|
||||
fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx)
|
||||
}
|
||||
|
||||
// Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label.
|
||||
var offset int64
|
||||
for i, pos := range ectx.OrderedBlockLabels {
|
||||
pos.BinaryOffset = offset
|
||||
var size int64
|
||||
for cur := pos.Begin; ; cur = cur.next {
|
||||
switch cur.kind {
|
||||
case nop0:
|
||||
l := cur.nop0Label()
|
||||
if pos, ok := ectx.LabelPositions[l]; ok {
|
||||
pos.BinaryOffset = offset + size
|
||||
}
|
||||
case condBr:
|
||||
if !cur.condBrOffsetResolved() {
|
||||
var nextLabel label
|
||||
if i < len(ectx.OrderedBlockLabels)-1 {
|
||||
// Note: this is only used when the block ends with fallthrough,
|
||||
// therefore can be safely assumed that the next block exists when it's needed.
|
||||
nextLabel = ectx.OrderedBlockLabels[i+1].L
|
||||
}
|
||||
m.condBrRelocs = append(m.condBrRelocs, condBrReloc{
|
||||
cbr: cur, currentLabelPos: pos, offset: offset + size,
|
||||
nextLabel: nextLabel,
|
||||
})
|
||||
}
|
||||
}
|
||||
size += cur.size()
|
||||
if cur == pos.End {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
if size > 0 {
|
||||
l := pos.L
|
||||
var labelStr string
|
||||
if blkID, ok := labelToSSABlockID[l]; ok {
|
||||
labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID)
|
||||
} else {
|
||||
labelStr = l.String()
|
||||
}
|
||||
wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr))
|
||||
}
|
||||
}
|
||||
offset += size
|
||||
}
|
||||
|
||||
// Before resolving any offsets, we need to check if all the conditional branches can be resolved.
|
||||
var needRerun bool
|
||||
for i := range m.condBrRelocs {
|
||||
reloc := &m.condBrRelocs[i]
|
||||
cbr := reloc.cbr
|
||||
offset := reloc.offset
|
||||
|
||||
target := cbr.condBrLabel()
|
||||
offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
|
||||
diff := offsetOfTarget - offset
|
||||
if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
|
||||
// This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block,
|
||||
// and jump to it.
|
||||
m.insertConditionalJumpTrampoline(cbr, reloc.currentLabelPos, reloc.nextLabel)
|
||||
// Then, we need to recall this function to fix up the label offsets
|
||||
// as they have changed after the trampoline is inserted.
|
||||
needRerun = true
|
||||
}
|
||||
}
|
||||
if needRerun {
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
wazevoapi.PerfMap.Clear()
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
var currentOffset int64
|
||||
for cur := ectx.RootInstr; cur != nil; cur = cur.next {
|
||||
switch cur.kind {
|
||||
case br:
|
||||
target := cur.brLabel()
|
||||
offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
|
||||
diff := offsetOfTarget - currentOffset
|
||||
divided := diff >> 2
|
||||
if divided < minSignedInt26 || divided > maxSignedInt26 {
|
||||
// This means the currently compiled single function is extremely large.
|
||||
panic("too large function that requires branch relocation of large unconditional branch larger than 26-bit range")
|
||||
}
|
||||
cur.brOffsetResolve(diff)
|
||||
case condBr:
|
||||
if !cur.condBrOffsetResolved() {
|
||||
target := cur.condBrLabel()
|
||||
offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
|
||||
diff := offsetOfTarget - currentOffset
|
||||
if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
|
||||
panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly")
|
||||
}
|
||||
cur.condBrOffsetResolve(diff)
|
||||
}
|
||||
case brTableSequence:
|
||||
tableIndex := cur.u1
|
||||
targets := m.jmpTableTargets[tableIndex]
|
||||
for i := range targets {
|
||||
l := label(targets[i])
|
||||
offsetOfTarget := ectx.LabelPositions[l].BinaryOffset
|
||||
diff := offsetOfTarget - (currentOffset + brTableSequenceOffsetTableBegin)
|
||||
targets[i] = uint32(diff)
|
||||
}
|
||||
cur.brTableSequenceOffsetsResolved()
|
||||
case emitSourceOffsetInfo:
|
||||
m.compiler.AddSourceOffsetInfo(currentOffset, cur.sourceOffsetInfo())
|
||||
}
|
||||
currentOffset += cur.size()
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
maxSignedInt26 = 1<<25 - 1
|
||||
minSignedInt26 = -(1 << 25)
|
||||
|
||||
maxSignedInt19 = 1<<18 - 1
|
||||
minSignedInt19 = -(1 << 18)
|
||||
)
|
||||
|
||||
func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) {
|
||||
cur := currentBlk.End
|
||||
originalTarget := cbr.condBrLabel()
|
||||
endNext := cur.next
|
||||
|
||||
if cur.kind != br {
|
||||
// If the current block ends with a conditional branch, we can just insert the trampoline after it.
|
||||
// Otherwise, we need to insert "skip" instruction to skip the trampoline instructions.
|
||||
skip := m.allocateInstr()
|
||||
skip.asBr(nextLabel)
|
||||
cur = linkInstr(cur, skip)
|
||||
}
|
||||
|
||||
cbrNewTargetInstr, cbrNewTargetLabel := m.allocateBrTarget()
|
||||
cbr.setCondBrTargets(cbrNewTargetLabel)
|
||||
cur = linkInstr(cur, cbrNewTargetInstr)
|
||||
|
||||
// Then insert the unconditional branch to the original, which should be possible to get encoded
|
||||
// as 26-bit offset should be enough for any practical application.
|
||||
br := m.allocateInstr()
|
||||
br.asBr(originalTarget)
|
||||
cur = linkInstr(cur, br)
|
||||
|
||||
// Update the end of the current block.
|
||||
currentBlk.End = cur
|
||||
|
||||
linkInstr(cur, endNext)
|
||||
}
|
||||
|
||||
// Format implements backend.Machine.
|
||||
func (m *machine) Format() string {
|
||||
ectx := m.executableContext
|
||||
begins := map[*instruction]label{}
|
||||
for l, pos := range ectx.LabelPositions {
|
||||
begins[pos.Begin] = l
|
||||
}
|
||||
|
||||
irBlocks := map[label]ssa.BasicBlockID{}
|
||||
for i, l := range ectx.SsaBlockIDToLabels {
|
||||
irBlocks[l] = ssa.BasicBlockID(i)
|
||||
}
|
||||
|
||||
var lines []string
|
||||
for cur := ectx.RootInstr; cur != nil; cur = cur.next {
|
||||
if l, ok := begins[cur]; ok {
|
||||
var labelStr string
|
||||
if blkID, ok := irBlocks[l]; ok {
|
||||
labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID)
|
||||
} else {
|
||||
labelStr = fmt.Sprintf("%s:", l)
|
||||
}
|
||||
lines = append(lines, labelStr)
|
||||
}
|
||||
if cur.kind == nop0 {
|
||||
continue
|
||||
}
|
||||
lines = append(lines, "\t"+cur.String())
|
||||
}
|
||||
return "\n" + strings.Join(lines, "\n") + "\n"
|
||||
}
|
||||
|
||||
// InsertReturn implements backend.Machine.
|
||||
func (m *machine) InsertReturn() {
|
||||
i := m.allocateInstr()
|
||||
i.asRet()
|
||||
m.insert(i)
|
||||
}
|
||||
|
||||
func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 {
|
||||
offset, ok := m.spillSlots[id]
|
||||
if !ok {
|
||||
offset = m.spillSlotSize
|
||||
// TODO: this should be aligned depending on the `size` to use Imm12 offset load/store as much as possible.
|
||||
m.spillSlots[id] = offset
|
||||
m.spillSlotSize += int64(size)
|
||||
}
|
||||
return offset + 16 // spill slot starts above the clobbered registers and the frame size.
|
||||
}
|
||||
|
||||
func (m *machine) clobberedRegSlotSize() int64 {
|
||||
return int64(len(m.clobberedRegs) * 16)
|
||||
}
|
||||
|
||||
func (m *machine) arg0OffsetFromSP() int64 {
|
||||
return m.frameSize() +
|
||||
16 + // 16-byte aligned return address
|
||||
16 // frame size saved below the clobbered registers.
|
||||
}
|
||||
|
||||
func (m *machine) ret0OffsetFromSP() int64 {
|
||||
return m.arg0OffsetFromSP() + m.currentABI.ArgStackSize
|
||||
}
|
||||
|
||||
func (m *machine) requiredStackSize() int64 {
|
||||
return m.maxRequiredStackSizeForCalls +
|
||||
m.frameSize() +
|
||||
16 + // 16-byte aligned return address.
|
||||
16 // frame size saved below the clobbered registers.
|
||||
}
|
||||
|
||||
func (m *machine) frameSize() int64 {
|
||||
s := m.clobberedRegSlotSize() + m.spillSlotSize
|
||||
if s&0xf != 0 {
|
||||
panic(fmt.Errorf("BUG: frame size %d is not 16-byte aligned", s))
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) {
|
||||
// TODO: reuse the slice!
|
||||
labels := make([]uint32, len(targets))
|
||||
for j, target := range targets {
|
||||
labels[j] = uint32(m.executableContext.GetOrAllocateSSABlockLabel(target))
|
||||
}
|
||||
index = len(m.jmpTableTargets)
|
||||
m.jmpTableTargets = append(m.jmpTableTargets, labels)
|
||||
return
|
||||
}
|
469
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
generated
vendored
Normal file
469
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
generated
vendored
Normal file
@ -0,0 +1,469 @@
|
||||
package arm64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
// PostRegAlloc implements backend.Machine.
|
||||
func (m *machine) PostRegAlloc() {
|
||||
m.setupPrologue()
|
||||
m.postRegAlloc()
|
||||
}
|
||||
|
||||
// setupPrologue initializes the prologue of the function.
|
||||
func (m *machine) setupPrologue() {
|
||||
ectx := m.executableContext
|
||||
|
||||
cur := ectx.RootInstr
|
||||
prevInitInst := cur.next
|
||||
|
||||
//
|
||||
// (high address) (high address)
|
||||
// SP----> +-----------------+ +------------------+ <----+
|
||||
// | ....... | | ....... | |
|
||||
// | ret Y | | ret Y | |
|
||||
// | ....... | | ....... | |
|
||||
// | ret 0 | | ret 0 | |
|
||||
// | arg X | | arg X | | size_of_arg_ret.
|
||||
// | ....... | ====> | ....... | |
|
||||
// | arg 1 | | arg 1 | |
|
||||
// | arg 0 | | arg 0 | <----+
|
||||
// |-----------------| | size_of_arg_ret |
|
||||
// | return address |
|
||||
// +------------------+ <---- SP
|
||||
// (low address) (low address)
|
||||
|
||||
// Saves the return address (lr) and the size_of_arg_ret below the SP.
|
||||
// size_of_arg_ret is used for stack unwinding.
|
||||
cur = m.createReturnAddrAndSizeOfArgRetSlot(cur)
|
||||
|
||||
if !m.stackBoundsCheckDisabled {
|
||||
cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur)
|
||||
}
|
||||
|
||||
// Decrement SP if spillSlotSize > 0.
|
||||
if m.spillSlotSize == 0 && len(m.spillSlots) != 0 {
|
||||
panic(fmt.Sprintf("BUG: spillSlotSize=%d, spillSlots=%v\n", m.spillSlotSize, m.spillSlots))
|
||||
}
|
||||
|
||||
if regs := m.clobberedRegs; len(regs) > 0 {
|
||||
//
|
||||
// (high address) (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | size_of_arg_ret | | size_of_arg_ret |
|
||||
// | ReturnAddress | | ReturnAddress |
|
||||
// SP----> +-----------------+ ====> +-----------------+
|
||||
// (low address) | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 0 |
|
||||
// +-----------------+ <----- SP
|
||||
// (low address)
|
||||
//
|
||||
_amode := addressModePreOrPostIndex(spVReg,
|
||||
-16, // stack pointer must be 16-byte aligned.
|
||||
true, // Decrement before store.
|
||||
)
|
||||
for _, vr := range regs {
|
||||
// TODO: pair stores to reduce the number of instructions.
|
||||
store := m.allocateInstr()
|
||||
store.asStore(operandNR(vr), _amode, regTypeToRegisterSizeInBits(vr.RegType()))
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
}
|
||||
|
||||
if size := m.spillSlotSize; size > 0 {
|
||||
// Check if size is 16-byte aligned.
|
||||
if size&0xf != 0 {
|
||||
panic(fmt.Errorf("BUG: spill slot size %d is not 16-byte aligned", size))
|
||||
}
|
||||
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, size, false)
|
||||
|
||||
// At this point, the stack looks like:
|
||||
//
|
||||
// (high address)
|
||||
// +------------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | size_of_arg_ret |
|
||||
// | ReturnAddress |
|
||||
// +------------------+
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ............ |
|
||||
// | spill slot 2 |
|
||||
// | spill slot 0 |
|
||||
// SP----> +------------------+
|
||||
// (low address)
|
||||
}
|
||||
|
||||
// We push the frame size into the stack to make it possible to unwind stack:
|
||||
//
|
||||
//
|
||||
// (high address) (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | size_of_arg_ret | | size_of_arg_ret |
|
||||
// | ReturnAddress | | ReturnAddress |
|
||||
// +-----------------+ ==> +-----------------+ <----+
|
||||
// | clobbered M | | clobbered M | |
|
||||
// | ............ | | ............ | |
|
||||
// | clobbered 2 | | clobbered 2 | |
|
||||
// | clobbered 1 | | clobbered 1 | | frame size
|
||||
// | clobbered 0 | | clobbered 0 | |
|
||||
// | spill slot N | | spill slot N | |
|
||||
// | ............ | | ............ | |
|
||||
// | spill slot 0 | | spill slot 0 | <----+
|
||||
// SP---> +-----------------+ | xxxxxx | ;; unused space to make it 16-byte aligned.
|
||||
// | frame_size |
|
||||
// +-----------------+ <---- SP
|
||||
// (low address)
|
||||
//
|
||||
cur = m.createFrameSizeSlot(cur, m.frameSize())
|
||||
|
||||
linkInstr(cur, prevInitInst)
|
||||
}
|
||||
|
||||
func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruction {
|
||||
// First we decrement the stack pointer to point the arg0 slot.
|
||||
var sizeOfArgRetReg regalloc.VReg
|
||||
s := int64(m.currentABI.AlignedArgResultStackSlotSize())
|
||||
if s > 0 {
|
||||
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s)
|
||||
sizeOfArgRetReg = tmpRegVReg
|
||||
|
||||
subSp := m.allocateInstr()
|
||||
subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true)
|
||||
cur = linkInstr(cur, subSp)
|
||||
} else {
|
||||
sizeOfArgRetReg = xzrVReg
|
||||
}
|
||||
|
||||
// Saves the return address (lr) and the size_of_arg_ret below the SP.
|
||||
// size_of_arg_ret is used for stack unwinding.
|
||||
pstr := m.allocateInstr()
|
||||
amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */)
|
||||
pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode)
|
||||
cur = linkInstr(cur, pstr)
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction {
|
||||
var frameSizeReg regalloc.VReg
|
||||
if s > 0 {
|
||||
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s)
|
||||
frameSizeReg = tmpRegVReg
|
||||
} else {
|
||||
frameSizeReg = xzrVReg
|
||||
}
|
||||
_amode := addressModePreOrPostIndex(spVReg,
|
||||
-16, // stack pointer must be 16-byte aligned.
|
||||
true, // Decrement before store.
|
||||
)
|
||||
store := m.allocateInstr()
|
||||
store.asStore(operandNR(frameSizeReg), _amode, 64)
|
||||
cur = linkInstr(cur, store)
|
||||
return cur
|
||||
}
|
||||
|
||||
// postRegAlloc does multiple things while walking through the instructions:
|
||||
// 1. Removes the redundant copy instruction.
|
||||
// 2. Inserts the epilogue.
|
||||
func (m *machine) postRegAlloc() {
|
||||
ectx := m.executableContext
|
||||
for cur := ectx.RootInstr; cur != nil; cur = cur.next {
|
||||
switch cur.kind {
|
||||
case ret:
|
||||
m.setupEpilogueAfter(cur.prev)
|
||||
case loadConstBlockArg:
|
||||
lc := cur
|
||||
next := lc.next
|
||||
m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]
|
||||
m.lowerLoadConstantBlockArgAfterRegAlloc(lc)
|
||||
for _, instr := range m.executableContext.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
linkInstr(cur, next)
|
||||
m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]
|
||||
default:
|
||||
// Removes the redundant copy instruction.
|
||||
if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() {
|
||||
prev, next := cur.prev, cur.next
|
||||
// Remove the copy instruction.
|
||||
prev.next = next
|
||||
if next != nil {
|
||||
next.prev = prev
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) setupEpilogueAfter(cur *instruction) {
|
||||
prevNext := cur.next
|
||||
|
||||
// We've stored the frame size in the prologue, and now that we are about to return from this function, we won't need it anymore.
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, 16, true)
|
||||
|
||||
if s := m.spillSlotSize; s > 0 {
|
||||
// Adjust SP to the original value:
|
||||
//
|
||||
// (high address) (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | xxxxx | | xxxxx |
|
||||
// | ReturnAddress | | ReturnAddress |
|
||||
// +-----------------+ ====> +-----------------+
|
||||
// | clobbered M | | clobbered M |
|
||||
// | ............ | | ............ |
|
||||
// | clobbered 1 | | clobbered 1 |
|
||||
// | clobbered 0 | | clobbered 0 |
|
||||
// | spill slot N | +-----------------+ <---- SP
|
||||
// | ............ |
|
||||
// | spill slot 0 |
|
||||
// SP---> +-----------------+
|
||||
// (low address)
|
||||
//
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
|
||||
}
|
||||
|
||||
// First we need to restore the clobbered registers.
|
||||
if len(m.clobberedRegs) > 0 {
|
||||
// (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | xxxxx | | xxxxx |
|
||||
// | ReturnAddress | | ReturnAddress |
|
||||
// +-----------------+ ========> +-----------------+ <---- SP
|
||||
// | clobbered M |
|
||||
// | ........... |
|
||||
// | clobbered 1 |
|
||||
// | clobbered 0 |
|
||||
// SP---> +-----------------+
|
||||
// (low address)
|
||||
|
||||
l := len(m.clobberedRegs) - 1
|
||||
for i := range m.clobberedRegs {
|
||||
vr := m.clobberedRegs[l-i] // reverse order to restore.
|
||||
load := m.allocateInstr()
|
||||
amode := addressModePreOrPostIndex(spVReg,
|
||||
16, // stack pointer must be 16-byte aligned.
|
||||
false, // Increment after store.
|
||||
)
|
||||
// TODO: pair loads to reduce the number of instructions.
|
||||
switch regTypeToRegisterSizeInBits(vr.RegType()) {
|
||||
case 64: // save int reg.
|
||||
load.asULoad(operandNR(vr), amode, 64)
|
||||
case 128: // save vector reg.
|
||||
load.asFpuLoad(operandNR(vr), amode, 128)
|
||||
}
|
||||
cur = linkInstr(cur, load)
|
||||
}
|
||||
}
|
||||
|
||||
// Reload the return address (lr).
|
||||
//
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | ===> | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | xxxxx | +-----------------+ <---- SP
|
||||
// | ReturnAddress |
|
||||
// SP----> +-----------------+
|
||||
|
||||
ldr := m.allocateInstr()
|
||||
ldr.asULoad(operandNR(lrVReg),
|
||||
addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
|
||||
cur = linkInstr(cur, ldr)
|
||||
|
||||
if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 {
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
|
||||
}
|
||||
|
||||
linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
|
||||
// stack space left. Basically this is the combination of CalleeSavedRegisters plus argument registers execpt for x0,
|
||||
// which always points to the execution context whenever the native code is entered from Go.
|
||||
var saveRequiredRegs = []regalloc.VReg{
|
||||
x1VReg, x2VReg, x3VReg, x4VReg, x5VReg, x6VReg, x7VReg,
|
||||
x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg, lrVReg,
|
||||
v0VReg, v1VReg, v2VReg, v3VReg, v4VReg, v5VReg, v6VReg, v7VReg,
|
||||
v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg,
|
||||
}
|
||||
|
||||
// insertStackBoundsCheck will insert the instructions after `cur` to check the
|
||||
// stack bounds, and if there's no sufficient spaces required for the function,
|
||||
// exit the execution and try growing it in Go world.
|
||||
//
|
||||
// TODO: we should be able to share the instructions across all the functions to reduce the size of compiled executable.
|
||||
func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction {
|
||||
if requiredStackSize%16 != 0 {
|
||||
panic("BUG")
|
||||
}
|
||||
|
||||
if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok {
|
||||
// sub tmp, sp, #requiredStackSize
|
||||
sub := m.allocateInstr()
|
||||
sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true)
|
||||
cur = linkInstr(cur, sub)
|
||||
} else {
|
||||
// This case, we first load the requiredStackSize into the temporary register,
|
||||
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
|
||||
// Then subtract it.
|
||||
sub := m.allocateInstr()
|
||||
sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true)
|
||||
cur = linkInstr(cur, sub)
|
||||
}
|
||||
|
||||
tmp2 := x11VReg // Caller save, so it is safe to use it here in the prologue.
|
||||
|
||||
// ldr tmp2, [executionContext #StackBottomPtr]
|
||||
ldr := m.allocateInstr()
|
||||
ldr.asULoad(operandNR(tmp2), addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: x0VReg, // execution context is always the first argument.
|
||||
imm: wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(),
|
||||
}, 64)
|
||||
cur = linkInstr(cur, ldr)
|
||||
|
||||
// subs xzr, tmp, tmp2
|
||||
subs := m.allocateInstr()
|
||||
subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true)
|
||||
cur = linkInstr(cur, subs)
|
||||
|
||||
// b.ge #imm
|
||||
cbr := m.allocateInstr()
|
||||
cbr.asCondBr(ge.asCond(), labelInvalid, false /* ignored */)
|
||||
cur = linkInstr(cur, cbr)
|
||||
|
||||
// Set the required stack size and set it to the exec context.
|
||||
{
|
||||
// First load the requiredStackSize into the temporary register,
|
||||
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
|
||||
setRequiredStackSize := m.allocateInstr()
|
||||
setRequiredStackSize.asStore(operandNR(tmpRegVReg),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(),
|
||||
}, 64)
|
||||
|
||||
cur = linkInstr(cur, setRequiredStackSize)
|
||||
}
|
||||
|
||||
ldrAddress := m.allocateInstr()
|
||||
ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: x0VReg, // execution context is always the first argument
|
||||
imm: wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(),
|
||||
}, 64)
|
||||
cur = linkInstr(cur, ldrAddress)
|
||||
|
||||
// Then jumps to the stack grow call sequence's address, meaning
|
||||
// transferring the control to the code compiled by CompileStackGrowCallSequence.
|
||||
bl := m.allocateInstr()
|
||||
bl.asCallIndirect(tmpRegVReg, nil)
|
||||
cur = linkInstr(cur, bl)
|
||||
|
||||
// Now that we know the entire code, we can finalize how many bytes
|
||||
// we have to skip when the stack size is sufficient.
|
||||
var cbrOffset int64
|
||||
for _cur := cbr; ; _cur = _cur.next {
|
||||
cbrOffset += _cur.size()
|
||||
if _cur == cur {
|
||||
break
|
||||
}
|
||||
}
|
||||
cbr.condBrOffsetResolve(cbrOffset)
|
||||
return cur
|
||||
}
|
||||
|
||||
// CompileStackGrowCallSequence implements backend.Machine.
|
||||
func (m *machine) CompileStackGrowCallSequence() []byte {
|
||||
ectx := m.executableContext
|
||||
|
||||
cur := m.allocateInstr()
|
||||
cur.asNop0()
|
||||
ectx.RootInstr = cur
|
||||
|
||||
// Save the callee saved and argument registers.
|
||||
cur = m.saveRegistersInExecutionContext(cur, saveRequiredRegs)
|
||||
|
||||
// Save the current stack pointer.
|
||||
cur = m.saveCurrentStackPointer(cur, x0VReg)
|
||||
|
||||
// Set the exit status on the execution context.
|
||||
cur = m.setExitCode(cur, x0VReg, wazevoapi.ExitCodeGrowStack)
|
||||
|
||||
// Exit the execution.
|
||||
cur = m.storeReturnAddressAndExit(cur)
|
||||
|
||||
// After the exit, restore the saved registers.
|
||||
cur = m.restoreRegistersInExecutionContext(cur, saveRequiredRegs)
|
||||
|
||||
// Then goes back the original address of this stack grow call.
|
||||
ret := m.allocateInstr()
|
||||
ret.asRet()
|
||||
linkInstr(cur, ret)
|
||||
|
||||
m.encode(ectx.RootInstr)
|
||||
return m.compiler.Buf()
|
||||
}
|
||||
|
||||
func (m *machine) addsAddOrSubStackPointer(cur *instruction, rd regalloc.VReg, diff int64, add bool) *instruction {
|
||||
ectx := m.executableContext
|
||||
|
||||
ectx.PendingInstructions = ectx.PendingInstructions[:0]
|
||||
m.insertAddOrSubStackPointer(rd, diff, add)
|
||||
for _, inserted := range ectx.PendingInstructions {
|
||||
cur = linkInstr(cur, inserted)
|
||||
}
|
||||
return cur
|
||||
}
|
152
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
generated
vendored
Normal file
152
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
generated
vendored
Normal file
@ -0,0 +1,152 @@
|
||||
package arm64
|
||||
|
||||
// This file implements the interfaces required for register allocations. See backend.RegAllocFunctionMachine.
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// ClobberedRegisters implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) ClobberedRegisters(regs []regalloc.VReg) {
|
||||
m.clobberedRegs = append(m.clobberedRegs[:0], regs...)
|
||||
}
|
||||
|
||||
// Swap implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {
|
||||
prevNext := cur.next
|
||||
var mov1, mov2, mov3 *instruction
|
||||
if x1.RegType() == regalloc.RegTypeInt {
|
||||
if !tmp.Valid() {
|
||||
tmp = tmpRegVReg
|
||||
}
|
||||
mov1 = m.allocateInstr().asMove64(tmp, x1)
|
||||
mov2 = m.allocateInstr().asMove64(x1, x2)
|
||||
mov3 = m.allocateInstr().asMove64(x2, tmp)
|
||||
cur = linkInstr(cur, mov1)
|
||||
cur = linkInstr(cur, mov2)
|
||||
cur = linkInstr(cur, mov3)
|
||||
linkInstr(cur, prevNext)
|
||||
} else {
|
||||
if !tmp.Valid() {
|
||||
r2 := x2.RealReg()
|
||||
// Temporarily spill x1 to stack.
|
||||
cur = m.InsertStoreRegisterAt(x1, cur, true).prev
|
||||
// Then move x2 to x1.
|
||||
cur = linkInstr(cur, m.allocateInstr().asFpuMov128(x1, x2))
|
||||
linkInstr(cur, prevNext)
|
||||
// Then reload the original value on x1 from stack to r2.
|
||||
m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true)
|
||||
} else {
|
||||
mov1 = m.allocateInstr().asFpuMov128(tmp, x1)
|
||||
mov2 = m.allocateInstr().asFpuMov128(x1, x2)
|
||||
mov3 = m.allocateInstr().asFpuMov128(x2, tmp)
|
||||
cur = linkInstr(cur, mov1)
|
||||
cur = linkInstr(cur, mov2)
|
||||
cur = linkInstr(cur, mov3)
|
||||
linkInstr(cur, prevNext)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// InsertMoveBefore implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {
|
||||
typ := src.RegType()
|
||||
if typ != dst.RegType() {
|
||||
panic("BUG: src and dst must have the same type")
|
||||
}
|
||||
|
||||
mov := m.allocateInstr()
|
||||
if typ == regalloc.RegTypeInt {
|
||||
mov.asMove64(dst, src)
|
||||
} else {
|
||||
mov.asFpuMov128(dst, src)
|
||||
}
|
||||
|
||||
cur := instr.prev
|
||||
prevNext := cur.next
|
||||
cur = linkInstr(cur, mov)
|
||||
linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// SSABlockLabel implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label {
|
||||
return m.executableContext.SsaBlockIDToLabels[id]
|
||||
}
|
||||
|
||||
// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
|
||||
if !v.IsRealReg() {
|
||||
panic("BUG: VReg must be backed by real reg to be stored")
|
||||
}
|
||||
|
||||
typ := m.compiler.TypeOf(v)
|
||||
|
||||
var prevNext, cur *instruction
|
||||
if after {
|
||||
cur, prevNext = instr, instr.next
|
||||
} else {
|
||||
cur, prevNext = instr.prev, instr
|
||||
}
|
||||
|
||||
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
|
||||
var amode addressMode
|
||||
cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
|
||||
store := m.allocateInstr()
|
||||
store.asStore(operandNR(v), amode, typ.Bits())
|
||||
|
||||
cur = linkInstr(cur, store)
|
||||
return linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
|
||||
if !v.IsRealReg() {
|
||||
panic("BUG: VReg must be backed by real reg to be stored")
|
||||
}
|
||||
|
||||
typ := m.compiler.TypeOf(v)
|
||||
|
||||
var prevNext, cur *instruction
|
||||
if after {
|
||||
cur, prevNext = instr, instr.next
|
||||
} else {
|
||||
cur, prevNext = instr.prev, instr
|
||||
}
|
||||
|
||||
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
|
||||
var amode addressMode
|
||||
cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
|
||||
load := m.allocateInstr()
|
||||
switch typ {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
load.asULoad(operandNR(v), amode, typ.Bits())
|
||||
case ssa.TypeF32, ssa.TypeF64:
|
||||
load.asFpuLoad(operandNR(v), amode, typ.Bits())
|
||||
case ssa.TypeV128:
|
||||
load.asFpuLoad(operandNR(v), amode, 128)
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
cur = linkInstr(cur, load)
|
||||
return linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// LastInstrForInsertion implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction {
|
||||
cur := end
|
||||
for cur.kind == nop0 {
|
||||
cur = cur.prev
|
||||
if cur == begin {
|
||||
return end
|
||||
}
|
||||
}
|
||||
switch cur.kind {
|
||||
case br:
|
||||
return cur
|
||||
default:
|
||||
return end
|
||||
}
|
||||
}
|
117
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
generated
vendored
Normal file
117
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
generated
vendored
Normal file
@ -0,0 +1,117 @@
|
||||
package arm64
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
)
|
||||
|
||||
const (
|
||||
// trampolineCallSize is the size of the trampoline instruction sequence for each function in an island.
|
||||
trampolineCallSize = 4*4 + 4 // Four instructions + 32-bit immediate.
|
||||
|
||||
// Unconditional branch offset is encoded as divided by 4 in imm26.
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en
|
||||
|
||||
maxUnconditionalBranchOffset = maxSignedInt26 * 4
|
||||
minUnconditionalBranchOffset = minSignedInt26 * 4
|
||||
|
||||
// trampolineIslandInterval is the range of the trampoline island.
|
||||
// Half of the range is used for the trampoline island, and the other half is used for the function.
|
||||
trampolineIslandInterval = maxUnconditionalBranchOffset / 2
|
||||
|
||||
// maxNumFunctions explicitly specifies the maximum number of functions that can be allowed in a single executable.
|
||||
maxNumFunctions = trampolineIslandInterval >> 6
|
||||
|
||||
// maxFunctionExecutableSize is the maximum size of a function that can exist in a trampoline island.
|
||||
// Conservatively set to 1/4 of the trampoline island interval.
|
||||
maxFunctionExecutableSize = trampolineIslandInterval >> 2
|
||||
)
|
||||
|
||||
// CallTrampolineIslandInfo implements backend.Machine CallTrampolineIslandInfo.
|
||||
func (m *machine) CallTrampolineIslandInfo(numFunctions int) (interval, size int, err error) {
|
||||
if numFunctions > maxNumFunctions {
|
||||
return 0, 0, fmt.Errorf("too many functions: %d > %d", numFunctions, maxNumFunctions)
|
||||
}
|
||||
return trampolineIslandInterval, trampolineCallSize * numFunctions, nil
|
||||
}
|
||||
|
||||
// ResolveRelocations implements backend.Machine ResolveRelocations.
|
||||
func (m *machine) ResolveRelocations(
|
||||
refToBinaryOffset []int,
|
||||
executable []byte,
|
||||
relocations []backend.RelocationInfo,
|
||||
callTrampolineIslandOffsets []int,
|
||||
) {
|
||||
for _, islandOffset := range callTrampolineIslandOffsets {
|
||||
encodeCallTrampolineIsland(refToBinaryOffset, islandOffset, executable)
|
||||
}
|
||||
|
||||
for _, r := range relocations {
|
||||
instrOffset := r.Offset
|
||||
calleeFnOffset := refToBinaryOffset[r.FuncRef]
|
||||
diff := int64(calleeFnOffset) - (instrOffset)
|
||||
// Check if the diff is within the range of the branch instruction.
|
||||
if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
|
||||
// Find the near trampoline island from callTrampolineIslandOffsets.
|
||||
islandOffset := searchTrampolineIsland(callTrampolineIslandOffsets, int(instrOffset))
|
||||
islandTargetOffset := islandOffset + trampolineCallSize*int(r.FuncRef)
|
||||
diff = int64(islandTargetOffset) - (instrOffset)
|
||||
if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
|
||||
panic("BUG in trampoline placement")
|
||||
}
|
||||
}
|
||||
binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(true, diff))
|
||||
}
|
||||
}
|
||||
|
||||
// encodeCallTrampolineIsland encodes a trampoline island for the given functions.
|
||||
// Each island consists of a trampoline instruction sequence for each function.
|
||||
// Each trampoline instruction sequence consists of 4 instructions + 32-bit immediate.
|
||||
func encodeCallTrampolineIsland(refToBinaryOffset []int, islandOffset int, executable []byte) {
|
||||
for i := 0; i < len(refToBinaryOffset); i++ {
|
||||
trampolineOffset := islandOffset + trampolineCallSize*i
|
||||
|
||||
fnOffset := refToBinaryOffset[i]
|
||||
diff := fnOffset - (trampolineOffset + 16)
|
||||
if diff > math.MaxInt32 || diff < math.MinInt32 {
|
||||
// This case even amd64 can't handle. 4GB is too big.
|
||||
panic("too big binary")
|
||||
}
|
||||
|
||||
// The tmpReg, tmpReg2 is safe to overwrite (in fact any caller-saved register is safe to use).
|
||||
tmpReg, tmpReg2 := regNumberInEncoding[tmpRegVReg.RealReg()], regNumberInEncoding[x11]
|
||||
|
||||
// adr tmpReg, PC+16: load the address of #diff into tmpReg.
|
||||
binary.LittleEndian.PutUint32(executable[trampolineOffset:], encodeAdr(tmpReg, 16))
|
||||
// ldrsw tmpReg2, [tmpReg]: Load #diff into tmpReg2.
|
||||
binary.LittleEndian.PutUint32(executable[trampolineOffset+4:],
|
||||
encodeLoadOrStore(sLoad32, tmpReg2, addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpRegVReg}))
|
||||
// add tmpReg, tmpReg2, tmpReg: add #diff to the address of #diff, getting the absolute address of the function.
|
||||
binary.LittleEndian.PutUint32(executable[trampolineOffset+8:],
|
||||
encodeAluRRR(aluOpAdd, tmpReg, tmpReg, tmpReg2, true, false))
|
||||
// br tmpReg: branch to the function without overwriting the link register.
|
||||
binary.LittleEndian.PutUint32(executable[trampolineOffset+12:], encodeUnconditionalBranchReg(tmpReg, false))
|
||||
// #diff
|
||||
binary.LittleEndian.PutUint32(executable[trampolineOffset+16:], uint32(diff))
|
||||
}
|
||||
}
|
||||
|
||||
// searchTrampolineIsland finds the nearest trampoline island from callTrampolineIslandOffsets.
|
||||
// Note that even if the offset is in the middle of two islands, it returns the latter one.
|
||||
// That is ok because the island is always placed in the middle of the range.
|
||||
//
|
||||
// precondition: callTrampolineIslandOffsets is sorted in ascending order.
|
||||
func searchTrampolineIsland(callTrampolineIslandOffsets []int, offset int) int {
|
||||
l := len(callTrampolineIslandOffsets)
|
||||
n := sort.Search(l, func(i int) bool {
|
||||
return callTrampolineIslandOffsets[i] >= offset
|
||||
})
|
||||
if n == l {
|
||||
n = l - 1
|
||||
}
|
||||
return callTrampolineIslandOffsets[n]
|
||||
}
|
397
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/reg.go
generated
vendored
Normal file
397
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/reg.go
generated
vendored
Normal file
@ -0,0 +1,397 @@
|
||||
package arm64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
)
|
||||
|
||||
// Arm64-specific registers.
|
||||
//
|
||||
// See https://developer.arm.com/documentation/dui0801/a/Overview-of-AArch64-state/Predeclared-core-register-names-in-AArch64-state
|
||||
|
||||
const (
|
||||
// General purpose registers. Note that we do not distinguish wn and xn registers
|
||||
// because they are the same from the perspective of register allocator, and
|
||||
// the size can be determined by the type of the instruction.
|
||||
|
||||
x0 = regalloc.RealRegInvalid + 1 + iota
|
||||
x1
|
||||
x2
|
||||
x3
|
||||
x4
|
||||
x5
|
||||
x6
|
||||
x7
|
||||
x8
|
||||
x9
|
||||
x10
|
||||
x11
|
||||
x12
|
||||
x13
|
||||
x14
|
||||
x15
|
||||
x16
|
||||
x17
|
||||
x18
|
||||
x19
|
||||
x20
|
||||
x21
|
||||
x22
|
||||
x23
|
||||
x24
|
||||
x25
|
||||
x26
|
||||
x27
|
||||
x28
|
||||
x29
|
||||
x30
|
||||
|
||||
// Vector registers. Note that we do not distinguish vn and dn, ... registers
|
||||
// because they are the same from the perspective of register allocator, and
|
||||
// the size can be determined by the type of the instruction.
|
||||
|
||||
v0
|
||||
v1
|
||||
v2
|
||||
v3
|
||||
v4
|
||||
v5
|
||||
v6
|
||||
v7
|
||||
v8
|
||||
v9
|
||||
v10
|
||||
v11
|
||||
v12
|
||||
v13
|
||||
v14
|
||||
v15
|
||||
v16
|
||||
v17
|
||||
v18
|
||||
v19
|
||||
v20
|
||||
v21
|
||||
v22
|
||||
v23
|
||||
v24
|
||||
v25
|
||||
v26
|
||||
v27
|
||||
v28
|
||||
v29
|
||||
v30
|
||||
v31
|
||||
|
||||
// Special registers
|
||||
|
||||
xzr
|
||||
sp
|
||||
lr = x30
|
||||
fp = x29
|
||||
tmp = x27
|
||||
)
|
||||
|
||||
var (
|
||||
x0VReg = regalloc.FromRealReg(x0, regalloc.RegTypeInt)
|
||||
x1VReg = regalloc.FromRealReg(x1, regalloc.RegTypeInt)
|
||||
x2VReg = regalloc.FromRealReg(x2, regalloc.RegTypeInt)
|
||||
x3VReg = regalloc.FromRealReg(x3, regalloc.RegTypeInt)
|
||||
x4VReg = regalloc.FromRealReg(x4, regalloc.RegTypeInt)
|
||||
x5VReg = regalloc.FromRealReg(x5, regalloc.RegTypeInt)
|
||||
x6VReg = regalloc.FromRealReg(x6, regalloc.RegTypeInt)
|
||||
x7VReg = regalloc.FromRealReg(x7, regalloc.RegTypeInt)
|
||||
x8VReg = regalloc.FromRealReg(x8, regalloc.RegTypeInt)
|
||||
x9VReg = regalloc.FromRealReg(x9, regalloc.RegTypeInt)
|
||||
x10VReg = regalloc.FromRealReg(x10, regalloc.RegTypeInt)
|
||||
x11VReg = regalloc.FromRealReg(x11, regalloc.RegTypeInt)
|
||||
x12VReg = regalloc.FromRealReg(x12, regalloc.RegTypeInt)
|
||||
x13VReg = regalloc.FromRealReg(x13, regalloc.RegTypeInt)
|
||||
x14VReg = regalloc.FromRealReg(x14, regalloc.RegTypeInt)
|
||||
x15VReg = regalloc.FromRealReg(x15, regalloc.RegTypeInt)
|
||||
x16VReg = regalloc.FromRealReg(x16, regalloc.RegTypeInt)
|
||||
x17VReg = regalloc.FromRealReg(x17, regalloc.RegTypeInt)
|
||||
x18VReg = regalloc.FromRealReg(x18, regalloc.RegTypeInt)
|
||||
x19VReg = regalloc.FromRealReg(x19, regalloc.RegTypeInt)
|
||||
x20VReg = regalloc.FromRealReg(x20, regalloc.RegTypeInt)
|
||||
x21VReg = regalloc.FromRealReg(x21, regalloc.RegTypeInt)
|
||||
x22VReg = regalloc.FromRealReg(x22, regalloc.RegTypeInt)
|
||||
x23VReg = regalloc.FromRealReg(x23, regalloc.RegTypeInt)
|
||||
x24VReg = regalloc.FromRealReg(x24, regalloc.RegTypeInt)
|
||||
x25VReg = regalloc.FromRealReg(x25, regalloc.RegTypeInt)
|
||||
x26VReg = regalloc.FromRealReg(x26, regalloc.RegTypeInt)
|
||||
x27VReg = regalloc.FromRealReg(x27, regalloc.RegTypeInt)
|
||||
x28VReg = regalloc.FromRealReg(x28, regalloc.RegTypeInt)
|
||||
x29VReg = regalloc.FromRealReg(x29, regalloc.RegTypeInt)
|
||||
x30VReg = regalloc.FromRealReg(x30, regalloc.RegTypeInt)
|
||||
v0VReg = regalloc.FromRealReg(v0, regalloc.RegTypeFloat)
|
||||
v1VReg = regalloc.FromRealReg(v1, regalloc.RegTypeFloat)
|
||||
v2VReg = regalloc.FromRealReg(v2, regalloc.RegTypeFloat)
|
||||
v3VReg = regalloc.FromRealReg(v3, regalloc.RegTypeFloat)
|
||||
v4VReg = regalloc.FromRealReg(v4, regalloc.RegTypeFloat)
|
||||
v5VReg = regalloc.FromRealReg(v5, regalloc.RegTypeFloat)
|
||||
v6VReg = regalloc.FromRealReg(v6, regalloc.RegTypeFloat)
|
||||
v7VReg = regalloc.FromRealReg(v7, regalloc.RegTypeFloat)
|
||||
v8VReg = regalloc.FromRealReg(v8, regalloc.RegTypeFloat)
|
||||
v9VReg = regalloc.FromRealReg(v9, regalloc.RegTypeFloat)
|
||||
v10VReg = regalloc.FromRealReg(v10, regalloc.RegTypeFloat)
|
||||
v11VReg = regalloc.FromRealReg(v11, regalloc.RegTypeFloat)
|
||||
v12VReg = regalloc.FromRealReg(v12, regalloc.RegTypeFloat)
|
||||
v13VReg = regalloc.FromRealReg(v13, regalloc.RegTypeFloat)
|
||||
v14VReg = regalloc.FromRealReg(v14, regalloc.RegTypeFloat)
|
||||
v15VReg = regalloc.FromRealReg(v15, regalloc.RegTypeFloat)
|
||||
v16VReg = regalloc.FromRealReg(v16, regalloc.RegTypeFloat)
|
||||
v17VReg = regalloc.FromRealReg(v17, regalloc.RegTypeFloat)
|
||||
v18VReg = regalloc.FromRealReg(v18, regalloc.RegTypeFloat)
|
||||
v19VReg = regalloc.FromRealReg(v19, regalloc.RegTypeFloat)
|
||||
v20VReg = regalloc.FromRealReg(v20, regalloc.RegTypeFloat)
|
||||
v21VReg = regalloc.FromRealReg(v21, regalloc.RegTypeFloat)
|
||||
v22VReg = regalloc.FromRealReg(v22, regalloc.RegTypeFloat)
|
||||
v23VReg = regalloc.FromRealReg(v23, regalloc.RegTypeFloat)
|
||||
v24VReg = regalloc.FromRealReg(v24, regalloc.RegTypeFloat)
|
||||
v25VReg = regalloc.FromRealReg(v25, regalloc.RegTypeFloat)
|
||||
v26VReg = regalloc.FromRealReg(v26, regalloc.RegTypeFloat)
|
||||
v27VReg = regalloc.FromRealReg(v27, regalloc.RegTypeFloat)
|
||||
// lr (link register) holds the return address at the function entry.
|
||||
lrVReg = x30VReg
|
||||
// tmpReg is used to perform spill/load on large stack offsets, and load large constants.
|
||||
// Therefore, be cautious to use this register in the middle of the compilation, especially before the register allocation.
|
||||
// This is the same as golang/go, but it's only described in the source code:
|
||||
// https://github.com/golang/go/blob/18e17e2cb12837ea2c8582ecdb0cc780f49a1aac/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go#L59
|
||||
// https://github.com/golang/go/blob/18e17e2cb12837ea2c8582ecdb0cc780f49a1aac/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go#L13-L15
|
||||
tmpRegVReg = regalloc.FromRealReg(tmp, regalloc.RegTypeInt)
|
||||
v28VReg = regalloc.FromRealReg(v28, regalloc.RegTypeFloat)
|
||||
v29VReg = regalloc.FromRealReg(v29, regalloc.RegTypeFloat)
|
||||
v30VReg = regalloc.FromRealReg(v30, regalloc.RegTypeFloat)
|
||||
v31VReg = regalloc.FromRealReg(v31, regalloc.RegTypeFloat)
|
||||
xzrVReg = regalloc.FromRealReg(xzr, regalloc.RegTypeInt)
|
||||
spVReg = regalloc.FromRealReg(sp, regalloc.RegTypeInt)
|
||||
fpVReg = regalloc.FromRealReg(fp, regalloc.RegTypeInt)
|
||||
)
|
||||
|
||||
var regNames = [...]string{
|
||||
x0: "x0",
|
||||
x1: "x1",
|
||||
x2: "x2",
|
||||
x3: "x3",
|
||||
x4: "x4",
|
||||
x5: "x5",
|
||||
x6: "x6",
|
||||
x7: "x7",
|
||||
x8: "x8",
|
||||
x9: "x9",
|
||||
x10: "x10",
|
||||
x11: "x11",
|
||||
x12: "x12",
|
||||
x13: "x13",
|
||||
x14: "x14",
|
||||
x15: "x15",
|
||||
x16: "x16",
|
||||
x17: "x17",
|
||||
x18: "x18",
|
||||
x19: "x19",
|
||||
x20: "x20",
|
||||
x21: "x21",
|
||||
x22: "x22",
|
||||
x23: "x23",
|
||||
x24: "x24",
|
||||
x25: "x25",
|
||||
x26: "x26",
|
||||
x27: "x27",
|
||||
x28: "x28",
|
||||
x29: "x29",
|
||||
x30: "x30",
|
||||
xzr: "xzr",
|
||||
sp: "sp",
|
||||
v0: "v0",
|
||||
v1: "v1",
|
||||
v2: "v2",
|
||||
v3: "v3",
|
||||
v4: "v4",
|
||||
v5: "v5",
|
||||
v6: "v6",
|
||||
v7: "v7",
|
||||
v8: "v8",
|
||||
v9: "v9",
|
||||
v10: "v10",
|
||||
v11: "v11",
|
||||
v12: "v12",
|
||||
v13: "v13",
|
||||
v14: "v14",
|
||||
v15: "v15",
|
||||
v16: "v16",
|
||||
v17: "v17",
|
||||
v18: "v18",
|
||||
v19: "v19",
|
||||
v20: "v20",
|
||||
v21: "v21",
|
||||
v22: "v22",
|
||||
v23: "v23",
|
||||
v24: "v24",
|
||||
v25: "v25",
|
||||
v26: "v26",
|
||||
v27: "v27",
|
||||
v28: "v28",
|
||||
v29: "v29",
|
||||
v30: "v30",
|
||||
v31: "v31",
|
||||
}
|
||||
|
||||
func formatVRegSized(r regalloc.VReg, size byte) (ret string) {
|
||||
if r.IsRealReg() {
|
||||
ret = regNames[r.RealReg()]
|
||||
switch ret[0] {
|
||||
case 'x':
|
||||
switch size {
|
||||
case 32:
|
||||
ret = strings.Replace(ret, "x", "w", 1)
|
||||
case 64:
|
||||
default:
|
||||
panic("BUG: invalid register size: " + strconv.Itoa(int(size)))
|
||||
}
|
||||
case 'v':
|
||||
switch size {
|
||||
case 32:
|
||||
ret = strings.Replace(ret, "v", "s", 1)
|
||||
case 64:
|
||||
ret = strings.Replace(ret, "v", "d", 1)
|
||||
case 128:
|
||||
ret = strings.Replace(ret, "v", "q", 1)
|
||||
default:
|
||||
panic("BUG: invalid register size")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
switch r.RegType() {
|
||||
case regalloc.RegTypeInt:
|
||||
switch size {
|
||||
case 32:
|
||||
ret = fmt.Sprintf("w%d?", r.ID())
|
||||
case 64:
|
||||
ret = fmt.Sprintf("x%d?", r.ID())
|
||||
default:
|
||||
panic("BUG: invalid register size: " + strconv.Itoa(int(size)))
|
||||
}
|
||||
case regalloc.RegTypeFloat:
|
||||
switch size {
|
||||
case 32:
|
||||
ret = fmt.Sprintf("s%d?", r.ID())
|
||||
case 64:
|
||||
ret = fmt.Sprintf("d%d?", r.ID())
|
||||
case 128:
|
||||
ret = fmt.Sprintf("q%d?", r.ID())
|
||||
default:
|
||||
panic("BUG: invalid register size")
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("BUG: invalid register type: %d for %s", r.RegType(), r))
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func formatVRegWidthVec(r regalloc.VReg, width vecArrangement) (ret string) {
|
||||
var id string
|
||||
wspec := strings.ToLower(width.String())
|
||||
if r.IsRealReg() {
|
||||
id = regNames[r.RealReg()][1:]
|
||||
} else {
|
||||
id = fmt.Sprintf("%d?", r.ID())
|
||||
}
|
||||
ret = fmt.Sprintf("%s%s", wspec, id)
|
||||
return
|
||||
}
|
||||
|
||||
func formatVRegVec(r regalloc.VReg, arr vecArrangement, index vecIndex) (ret string) {
|
||||
id := fmt.Sprintf("v%d?", r.ID())
|
||||
if r.IsRealReg() {
|
||||
id = regNames[r.RealReg()]
|
||||
}
|
||||
ret = fmt.Sprintf("%s.%s", id, strings.ToLower(arr.String()))
|
||||
if index != vecIndexNone {
|
||||
ret += fmt.Sprintf("[%d]", index)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func regTypeToRegisterSizeInBits(r regalloc.RegType) byte {
|
||||
switch r {
|
||||
case regalloc.RegTypeInt:
|
||||
return 64
|
||||
case regalloc.RegTypeFloat:
|
||||
return 128
|
||||
default:
|
||||
panic("BUG: invalid register type")
|
||||
}
|
||||
}
|
||||
|
||||
var regNumberInEncoding = [...]uint32{
|
||||
x0: 0,
|
||||
x1: 1,
|
||||
x2: 2,
|
||||
x3: 3,
|
||||
x4: 4,
|
||||
x5: 5,
|
||||
x6: 6,
|
||||
x7: 7,
|
||||
x8: 8,
|
||||
x9: 9,
|
||||
x10: 10,
|
||||
x11: 11,
|
||||
x12: 12,
|
||||
x13: 13,
|
||||
x14: 14,
|
||||
x15: 15,
|
||||
x16: 16,
|
||||
x17: 17,
|
||||
x18: 18,
|
||||
x19: 19,
|
||||
x20: 20,
|
||||
x21: 21,
|
||||
x22: 22,
|
||||
x23: 23,
|
||||
x24: 24,
|
||||
x25: 25,
|
||||
x26: 26,
|
||||
x27: 27,
|
||||
x28: 28,
|
||||
x29: 29,
|
||||
x30: 30,
|
||||
xzr: 31,
|
||||
sp: 31,
|
||||
v0: 0,
|
||||
v1: 1,
|
||||
v2: 2,
|
||||
v3: 3,
|
||||
v4: 4,
|
||||
v5: 5,
|
||||
v6: 6,
|
||||
v7: 7,
|
||||
v8: 8,
|
||||
v9: 9,
|
||||
v10: 10,
|
||||
v11: 11,
|
||||
v12: 12,
|
||||
v13: 13,
|
||||
v14: 14,
|
||||
v15: 15,
|
||||
v16: 16,
|
||||
v17: 17,
|
||||
v18: 18,
|
||||
v19: 19,
|
||||
v20: 20,
|
||||
v21: 21,
|
||||
v22: 22,
|
||||
v23: 23,
|
||||
v24: 24,
|
||||
v25: 25,
|
||||
v26: 26,
|
||||
v27: 27,
|
||||
v28: 28,
|
||||
v29: 29,
|
||||
v30: 30,
|
||||
v31: 31,
|
||||
}
|
90
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go
generated
vendored
Normal file
90
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go
generated
vendored
Normal file
@ -0,0 +1,90 @@
|
||||
package arm64
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"reflect"
|
||||
"unsafe"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/wasmdebug"
|
||||
)
|
||||
|
||||
// UnwindStack implements wazevo.unwindStack.
|
||||
func UnwindStack(sp, _, top uintptr, returnAddresses []uintptr) []uintptr {
|
||||
l := int(top - sp)
|
||||
|
||||
var stackBuf []byte
|
||||
{
|
||||
// TODO: use unsafe.Slice after floor version is set to Go 1.20.
|
||||
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))
|
||||
hdr.Data = sp
|
||||
hdr.Len = l
|
||||
hdr.Cap = l
|
||||
}
|
||||
|
||||
for i := uint64(0); i < uint64(l); {
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y | <----+
|
||||
// | ....... | |
|
||||
// | ret 0 | |
|
||||
// | arg X | | size_of_arg_ret
|
||||
// | ....... | |
|
||||
// | arg 1 | |
|
||||
// | arg 0 | <----+
|
||||
// | size_of_arg_ret |
|
||||
// | ReturnAddress |
|
||||
// +-----------------+ <----+
|
||||
// | ........... | |
|
||||
// | spill slot M | |
|
||||
// | ............ | |
|
||||
// | spill slot 2 | |
|
||||
// | spill slot 1 | | frame size
|
||||
// | spill slot 1 | |
|
||||
// | clobbered N | |
|
||||
// | ............ | |
|
||||
// | clobbered 0 | <----+
|
||||
// | xxxxxx | ;; unused space to make it 16-byte aligned.
|
||||
// | frame_size |
|
||||
// +-----------------+ <---- SP
|
||||
// (low address)
|
||||
|
||||
frameSize := binary.LittleEndian.Uint64(stackBuf[i:])
|
||||
i += frameSize +
|
||||
16 // frame size + aligned space.
|
||||
retAddr := binary.LittleEndian.Uint64(stackBuf[i:])
|
||||
i += 8 // ret addr.
|
||||
sizeOfArgRet := binary.LittleEndian.Uint64(stackBuf[i:])
|
||||
i += 8 + sizeOfArgRet
|
||||
returnAddresses = append(returnAddresses, uintptr(retAddr))
|
||||
if len(returnAddresses) == wasmdebug.MaxFrames {
|
||||
break
|
||||
}
|
||||
}
|
||||
return returnAddresses
|
||||
}
|
||||
|
||||
// GoCallStackView implements wazevo.goCallStackView.
|
||||
func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
|
||||
// (high address)
|
||||
// +-----------------+ <----+
|
||||
// | xxxxxxxxxxx | | ;; optional unused space to make it 16-byte aligned.
|
||||
// ^ | arg[N]/ret[M] | |
|
||||
// sliceSize | | ............ | | sliceSize
|
||||
// | | arg[1]/ret[1] | |
|
||||
// v | arg[0]/ret[0] | <----+
|
||||
// | sliceSize |
|
||||
// | frame_size |
|
||||
// +-----------------+ <---- stackPointerBeforeGoCall
|
||||
// (low address)
|
||||
ptr := unsafe.Pointer(stackPointerBeforeGoCall)
|
||||
size := *(*uint64)(unsafe.Add(ptr, 8))
|
||||
var view []uint64
|
||||
{
|
||||
sh := (*reflect.SliceHeader)(unsafe.Pointer(&view))
|
||||
sh.Data = uintptr(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize).
|
||||
sh.Len = int(size)
|
||||
sh.Cap = int(size)
|
||||
}
|
||||
return view
|
||||
}
|
100
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go
generated
vendored
Normal file
100
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go
generated
vendored
Normal file
@ -0,0 +1,100 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
type (
|
||||
// Machine is a backend for a specific ISA machine.
|
||||
Machine interface {
|
||||
ExecutableContext() ExecutableContext
|
||||
|
||||
// DisableStackCheck disables the stack check for the current compilation for debugging/testing.
|
||||
DisableStackCheck()
|
||||
|
||||
// SetCurrentABI initializes the FunctionABI for the given signature.
|
||||
SetCurrentABI(abi *FunctionABI)
|
||||
|
||||
// SetCompiler sets the compilation context used for the lifetime of Machine.
|
||||
// This is only called once per Machine, i.e. before the first compilation.
|
||||
SetCompiler(Compiler)
|
||||
|
||||
// LowerSingleBranch is called when the compilation of the given single branch is started.
|
||||
LowerSingleBranch(b *ssa.Instruction)
|
||||
|
||||
// LowerConditionalBranch is called when the compilation of the given conditional branch is started.
|
||||
LowerConditionalBranch(b *ssa.Instruction)
|
||||
|
||||
// LowerInstr is called for each instruction in the given block except for the ones marked as already lowered
|
||||
// via Compiler.MarkLowered. The order is reverse, i.e. from the last instruction to the first one.
|
||||
//
|
||||
// Note: this can lower multiple instructions (which produce the inputs) at once whenever it's possible
|
||||
// for optimization.
|
||||
LowerInstr(*ssa.Instruction)
|
||||
|
||||
// Reset resets the machine state for the next compilation.
|
||||
Reset()
|
||||
|
||||
// InsertMove inserts a move instruction from src to dst whose type is typ.
|
||||
InsertMove(dst, src regalloc.VReg, typ ssa.Type)
|
||||
|
||||
// InsertReturn inserts the return instruction to return from the current function.
|
||||
InsertReturn()
|
||||
|
||||
// InsertLoadConstantBlockArg inserts the instruction(s) to load the constant value into the given regalloc.VReg.
|
||||
InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg)
|
||||
|
||||
// Format returns the string representation of the currently compiled machine code.
|
||||
// This is only for testing purpose.
|
||||
Format() string
|
||||
|
||||
// RegAlloc does the register allocation after lowering.
|
||||
RegAlloc()
|
||||
|
||||
// PostRegAlloc does the post register allocation, e.g. setting up prologue/epilogue, redundant move elimination, etc.
|
||||
PostRegAlloc()
|
||||
|
||||
// ResolveRelocations resolves the relocations after emitting machine code.
|
||||
// * refToBinaryOffset: the map from the function reference (ssa.FuncRef) to the executable offset.
|
||||
// * executable: the binary to resolve the relocations.
|
||||
// * relocations: the relocations to resolve.
|
||||
// * callTrampolineIslandOffsets: the offsets of the trampoline islands in the executable.
|
||||
ResolveRelocations(
|
||||
refToBinaryOffset []int,
|
||||
executable []byte,
|
||||
relocations []RelocationInfo,
|
||||
callTrampolineIslandOffsets []int,
|
||||
)
|
||||
|
||||
// Encode encodes the machine instructions to the Compiler.
|
||||
Encode(ctx context.Context) error
|
||||
|
||||
// CompileGoFunctionTrampoline compiles the trampoline function to call a Go function of the given exit code and signature.
|
||||
CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte
|
||||
|
||||
// CompileStackGrowCallSequence returns the sequence of instructions shared by all functions to
|
||||
// call the stack grow builtin function.
|
||||
CompileStackGrowCallSequence() []byte
|
||||
|
||||
// CompileEntryPreamble returns the sequence of instructions shared by multiple functions to
|
||||
// enter the function from Go.
|
||||
CompileEntryPreamble(signature *ssa.Signature) []byte
|
||||
|
||||
// LowerParams lowers the given parameters.
|
||||
LowerParams(params []ssa.Value)
|
||||
|
||||
// LowerReturns lowers the given returns.
|
||||
LowerReturns(returns []ssa.Value)
|
||||
|
||||
// ArgsResultsRegs returns the registers used for arguments and return values.
|
||||
ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg)
|
||||
|
||||
// CallTrampolineIslandInfo returns the interval of the offset where the trampoline island is placed, and
|
||||
// the size of the trampoline island. If islandSize is zero, the trampoline island is not used on this machine.
|
||||
CallTrampolineIslandInfo(numFunctions int) (interval, islandSize int, err error)
|
||||
}
|
||||
)
|
319
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
generated
vendored
Normal file
319
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
generated
vendored
Normal file
@ -0,0 +1,319 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// RegAllocFunctionMachine is the interface for the machine specific logic that will be used in RegAllocFunction.
|
||||
type RegAllocFunctionMachine[I regalloc.InstrConstraint] interface {
|
||||
// InsertMoveBefore inserts the move instruction from src to dst before the given instruction.
|
||||
InsertMoveBefore(dst, src regalloc.VReg, instr I)
|
||||
// InsertStoreRegisterAt inserts the instruction(s) to store the given virtual register at the given instruction.
|
||||
// If after is true, the instruction(s) will be inserted after the given instruction, otherwise before.
|
||||
InsertStoreRegisterAt(v regalloc.VReg, instr I, after bool) I
|
||||
// InsertReloadRegisterAt inserts the instruction(s) to reload the given virtual register at the given instruction.
|
||||
// If after is true, the instruction(s) will be inserted after the given instruction, otherwise before.
|
||||
InsertReloadRegisterAt(v regalloc.VReg, instr I, after bool) I
|
||||
// ClobberedRegisters is called when the register allocation is done and the clobbered registers are known.
|
||||
ClobberedRegisters(regs []regalloc.VReg)
|
||||
// Swap swaps the two virtual registers after the given instruction.
|
||||
Swap(cur I, x1, x2, tmp regalloc.VReg)
|
||||
// LastInstrForInsertion implements LastInstrForInsertion of regalloc.Function. See its comment for details.
|
||||
LastInstrForInsertion(begin, end I) I
|
||||
// SSABlockLabel returns the label of the given ssa.BasicBlockID.
|
||||
SSABlockLabel(id ssa.BasicBlockID) Label
|
||||
}
|
||||
|
||||
type (
|
||||
// RegAllocFunction implements regalloc.Function.
|
||||
RegAllocFunction[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct {
|
||||
m m
|
||||
ssb ssa.Builder
|
||||
c Compiler
|
||||
// iter is the iterator for reversePostOrderBlocks
|
||||
iter int
|
||||
reversePostOrderBlocks []RegAllocBlock[I, m]
|
||||
// labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks.
|
||||
labelToRegAllocBlockIndex map[Label]int
|
||||
loopNestingForestRoots []ssa.BasicBlock
|
||||
}
|
||||
|
||||
// RegAllocBlock implements regalloc.Block.
|
||||
RegAllocBlock[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct {
|
||||
// f is the function this instruction belongs to. Used to reuse the regAllocFunctionImpl.predsSlice slice for Defs() and Uses().
|
||||
f *RegAllocFunction[I, m]
|
||||
sb ssa.BasicBlock
|
||||
l Label
|
||||
begin, end I
|
||||
loopNestingForestChildren []ssa.BasicBlock
|
||||
cur I
|
||||
id int
|
||||
cachedLastInstrForInsertion I
|
||||
}
|
||||
)
|
||||
|
||||
// NewRegAllocFunction returns a new RegAllocFunction.
|
||||
func NewRegAllocFunction[I regalloc.InstrConstraint, M RegAllocFunctionMachine[I]](m M, ssb ssa.Builder, c Compiler) *RegAllocFunction[I, M] {
|
||||
return &RegAllocFunction[I, M]{
|
||||
m: m,
|
||||
ssb: ssb,
|
||||
c: c,
|
||||
labelToRegAllocBlockIndex: make(map[Label]int),
|
||||
}
|
||||
}
|
||||
|
||||
// AddBlock adds a new block to the function.
|
||||
func (f *RegAllocFunction[I, M]) AddBlock(sb ssa.BasicBlock, l Label, begin, end I) {
|
||||
i := len(f.reversePostOrderBlocks)
|
||||
f.reversePostOrderBlocks = append(f.reversePostOrderBlocks, RegAllocBlock[I, M]{
|
||||
f: f,
|
||||
sb: sb,
|
||||
l: l,
|
||||
begin: begin,
|
||||
end: end,
|
||||
id: int(sb.ID()),
|
||||
})
|
||||
f.labelToRegAllocBlockIndex[l] = i
|
||||
}
|
||||
|
||||
// Reset resets the function for the next compilation.
|
||||
func (f *RegAllocFunction[I, M]) Reset() {
|
||||
f.reversePostOrderBlocks = f.reversePostOrderBlocks[:0]
|
||||
f.iter = 0
|
||||
}
|
||||
|
||||
// StoreRegisterAfter implements regalloc.Function StoreRegisterAfter.
|
||||
func (f *RegAllocFunction[I, M]) StoreRegisterAfter(v regalloc.VReg, instr regalloc.Instr) {
|
||||
m := f.m
|
||||
m.InsertStoreRegisterAt(v, instr.(I), true)
|
||||
}
|
||||
|
||||
// ReloadRegisterBefore implements regalloc.Function ReloadRegisterBefore.
|
||||
func (f *RegAllocFunction[I, M]) ReloadRegisterBefore(v regalloc.VReg, instr regalloc.Instr) {
|
||||
m := f.m
|
||||
m.InsertReloadRegisterAt(v, instr.(I), false)
|
||||
}
|
||||
|
||||
// ReloadRegisterAfter implements regalloc.Function ReloadRegisterAfter.
|
||||
func (f *RegAllocFunction[I, M]) ReloadRegisterAfter(v regalloc.VReg, instr regalloc.Instr) {
|
||||
m := f.m
|
||||
m.InsertReloadRegisterAt(v, instr.(I), true)
|
||||
}
|
||||
|
||||
// StoreRegisterBefore implements regalloc.Function StoreRegisterBefore.
|
||||
func (f *RegAllocFunction[I, M]) StoreRegisterBefore(v regalloc.VReg, instr regalloc.Instr) {
|
||||
m := f.m
|
||||
m.InsertStoreRegisterAt(v, instr.(I), false)
|
||||
}
|
||||
|
||||
// ClobberedRegisters implements regalloc.Function ClobberedRegisters.
|
||||
func (f *RegAllocFunction[I, M]) ClobberedRegisters(regs []regalloc.VReg) {
|
||||
f.m.ClobberedRegisters(regs)
|
||||
}
|
||||
|
||||
// SwapBefore implements regalloc.Function SwapBefore.
|
||||
func (f *RegAllocFunction[I, M]) SwapBefore(x1, x2, tmp regalloc.VReg, instr regalloc.Instr) {
|
||||
f.m.Swap(instr.Prev().(I), x1, x2, tmp)
|
||||
}
|
||||
|
||||
// PostOrderBlockIteratorBegin implements regalloc.Function PostOrderBlockIteratorBegin.
|
||||
func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorBegin() regalloc.Block {
|
||||
f.iter = len(f.reversePostOrderBlocks) - 1
|
||||
return f.PostOrderBlockIteratorNext()
|
||||
}
|
||||
|
||||
// PostOrderBlockIteratorNext implements regalloc.Function PostOrderBlockIteratorNext.
|
||||
func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorNext() regalloc.Block {
|
||||
if f.iter < 0 {
|
||||
return nil
|
||||
}
|
||||
b := &f.reversePostOrderBlocks[f.iter]
|
||||
f.iter--
|
||||
return b
|
||||
}
|
||||
|
||||
// ReversePostOrderBlockIteratorBegin implements regalloc.Function ReversePostOrderBlockIteratorBegin.
|
||||
func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorBegin() regalloc.Block {
|
||||
f.iter = 0
|
||||
return f.ReversePostOrderBlockIteratorNext()
|
||||
}
|
||||
|
||||
// ReversePostOrderBlockIteratorNext implements regalloc.Function ReversePostOrderBlockIteratorNext.
|
||||
func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorNext() regalloc.Block {
|
||||
if f.iter >= len(f.reversePostOrderBlocks) {
|
||||
return nil
|
||||
}
|
||||
b := &f.reversePostOrderBlocks[f.iter]
|
||||
f.iter++
|
||||
return b
|
||||
}
|
||||
|
||||
// LoopNestingForestRoots implements regalloc.Function LoopNestingForestRoots.
|
||||
func (f *RegAllocFunction[I, M]) LoopNestingForestRoots() int {
|
||||
f.loopNestingForestRoots = f.ssb.LoopNestingForestRoots()
|
||||
return len(f.loopNestingForestRoots)
|
||||
}
|
||||
|
||||
// LoopNestingForestRoot implements regalloc.Function LoopNestingForestRoot.
|
||||
func (f *RegAllocFunction[I, M]) LoopNestingForestRoot(i int) regalloc.Block {
|
||||
blk := f.loopNestingForestRoots[i]
|
||||
l := f.m.SSABlockLabel(blk.ID())
|
||||
index := f.labelToRegAllocBlockIndex[l]
|
||||
return &f.reversePostOrderBlocks[index]
|
||||
}
|
||||
|
||||
// InsertMoveBefore implements regalloc.Function InsertMoveBefore.
|
||||
func (f *RegAllocFunction[I, M]) InsertMoveBefore(dst, src regalloc.VReg, instr regalloc.Instr) {
|
||||
f.m.InsertMoveBefore(dst, src, instr.(I))
|
||||
}
|
||||
|
||||
// LowestCommonAncestor implements regalloc.Function LowestCommonAncestor.
|
||||
func (f *RegAllocFunction[I, M]) LowestCommonAncestor(blk1, blk2 regalloc.Block) regalloc.Block {
|
||||
ret := f.ssb.LowestCommonAncestor(blk1.(*RegAllocBlock[I, M]).sb, blk2.(*RegAllocBlock[I, M]).sb)
|
||||
l := f.m.SSABlockLabel(ret.ID())
|
||||
index := f.labelToRegAllocBlockIndex[l]
|
||||
return &f.reversePostOrderBlocks[index]
|
||||
}
|
||||
|
||||
// Idom implements regalloc.Function Idom.
|
||||
func (f *RegAllocFunction[I, M]) Idom(blk regalloc.Block) regalloc.Block {
|
||||
builder := f.ssb
|
||||
idom := builder.Idom(blk.(*RegAllocBlock[I, M]).sb)
|
||||
if idom == nil {
|
||||
panic("BUG: idom must not be nil")
|
||||
}
|
||||
l := f.m.SSABlockLabel(idom.ID())
|
||||
index := f.labelToRegAllocBlockIndex[l]
|
||||
return &f.reversePostOrderBlocks[index]
|
||||
}
|
||||
|
||||
// ID implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) ID() int32 { return int32(r.id) }
|
||||
|
||||
// BlockParams implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) BlockParams(regs *[]regalloc.VReg) []regalloc.VReg {
|
||||
c := r.f.c
|
||||
*regs = (*regs)[:0]
|
||||
for i := 0; i < r.sb.Params(); i++ {
|
||||
v := c.VRegOf(r.sb.Param(i))
|
||||
*regs = append(*regs, v)
|
||||
}
|
||||
return *regs
|
||||
}
|
||||
|
||||
// InstrIteratorBegin implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) InstrIteratorBegin() regalloc.Instr {
|
||||
r.cur = r.begin
|
||||
return r.cur
|
||||
}
|
||||
|
||||
// InstrIteratorNext implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) InstrIteratorNext() regalloc.Instr {
|
||||
for {
|
||||
if r.cur == r.end {
|
||||
return nil
|
||||
}
|
||||
instr := r.cur.Next()
|
||||
r.cur = instr.(I)
|
||||
if instr == nil {
|
||||
return nil
|
||||
} else if instr.AddedBeforeRegAlloc() {
|
||||
// Only concerned about the instruction added before regalloc.
|
||||
return instr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// InstrRevIteratorBegin implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) InstrRevIteratorBegin() regalloc.Instr {
|
||||
r.cur = r.end
|
||||
return r.cur
|
||||
}
|
||||
|
||||
// InstrRevIteratorNext implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) InstrRevIteratorNext() regalloc.Instr {
|
||||
for {
|
||||
if r.cur == r.begin {
|
||||
return nil
|
||||
}
|
||||
instr := r.cur.Prev()
|
||||
r.cur = instr.(I)
|
||||
if instr == nil {
|
||||
return nil
|
||||
} else if instr.AddedBeforeRegAlloc() {
|
||||
// Only concerned about the instruction added before regalloc.
|
||||
return instr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FirstInstr implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) FirstInstr() regalloc.Instr {
|
||||
return r.begin
|
||||
}
|
||||
|
||||
// EndInstr implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) EndInstr() regalloc.Instr {
|
||||
return r.end
|
||||
}
|
||||
|
||||
// LastInstrForInsertion implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) LastInstrForInsertion() regalloc.Instr {
|
||||
var nil I
|
||||
if r.cachedLastInstrForInsertion == nil {
|
||||
r.cachedLastInstrForInsertion = r.f.m.LastInstrForInsertion(r.begin, r.end)
|
||||
}
|
||||
return r.cachedLastInstrForInsertion
|
||||
}
|
||||
|
||||
// Preds implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) Preds() int { return r.sb.Preds() }
|
||||
|
||||
// Pred implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) Pred(i int) regalloc.Block {
|
||||
sb := r.sb
|
||||
pred := sb.Pred(i)
|
||||
l := r.f.m.SSABlockLabel(pred.ID())
|
||||
index := r.f.labelToRegAllocBlockIndex[l]
|
||||
return &r.f.reversePostOrderBlocks[index]
|
||||
}
|
||||
|
||||
// Entry implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) Entry() bool { return r.sb.EntryBlock() }
|
||||
|
||||
// Succs implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) Succs() int {
|
||||
return r.sb.Succs()
|
||||
}
|
||||
|
||||
// Succ implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) Succ(i int) regalloc.Block {
|
||||
sb := r.sb
|
||||
succ := sb.Succ(i)
|
||||
if succ.ReturnBlock() {
|
||||
return nil
|
||||
}
|
||||
l := r.f.m.SSABlockLabel(succ.ID())
|
||||
index := r.f.labelToRegAllocBlockIndex[l]
|
||||
return &r.f.reversePostOrderBlocks[index]
|
||||
}
|
||||
|
||||
// LoopHeader implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) LoopHeader() bool {
|
||||
return r.sb.LoopHeader()
|
||||
}
|
||||
|
||||
// LoopNestingForestChildren implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) LoopNestingForestChildren() int {
|
||||
r.loopNestingForestChildren = r.sb.LoopNestingForestChildren()
|
||||
return len(r.loopNestingForestChildren)
|
||||
}
|
||||
|
||||
// LoopNestingForestChild implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) LoopNestingForestChild(i int) regalloc.Block {
|
||||
blk := r.loopNestingForestChildren[i]
|
||||
l := r.f.m.SSABlockLabel(blk.ID())
|
||||
index := r.f.labelToRegAllocBlockIndex[l]
|
||||
return &r.f.reversePostOrderBlocks[index]
|
||||
}
|
136
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go
generated
vendored
Normal file
136
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go
generated
vendored
Normal file
@ -0,0 +1,136 @@
|
||||
package regalloc
|
||||
|
||||
import "fmt"
|
||||
|
||||
// These interfaces are implemented by ISA-specific backends to abstract away the details, and allow the register
|
||||
// allocators to work on any ISA.
|
||||
//
|
||||
// TODO: the interfaces are not stabilized yet, especially x64 will need some changes. E.g. x64 has an addressing mode
|
||||
// where index can be in memory. That kind of info will be useful to reduce the register pressure, and should be leveraged
|
||||
// by the register allocators, like https://docs.rs/regalloc2/latest/regalloc2/enum.OperandConstraint.html
|
||||
|
||||
type (
|
||||
// Function is the top-level interface to do register allocation, which corresponds to a CFG containing
|
||||
// Blocks(s).
|
||||
Function interface {
|
||||
// PostOrderBlockIteratorBegin returns the first block in the post-order traversal of the CFG.
|
||||
// In other words, the last blocks in the CFG will be returned first.
|
||||
PostOrderBlockIteratorBegin() Block
|
||||
// PostOrderBlockIteratorNext returns the next block in the post-order traversal of the CFG.
|
||||
PostOrderBlockIteratorNext() Block
|
||||
// ReversePostOrderBlockIteratorBegin returns the first block in the reverse post-order traversal of the CFG.
|
||||
// In other words, the first blocks in the CFG will be returned first.
|
||||
ReversePostOrderBlockIteratorBegin() Block
|
||||
// ReversePostOrderBlockIteratorNext returns the next block in the reverse post-order traversal of the CFG.
|
||||
ReversePostOrderBlockIteratorNext() Block
|
||||
// ClobberedRegisters tell the clobbered registers by this function.
|
||||
ClobberedRegisters([]VReg)
|
||||
// LoopNestingForestRoots returns the number of roots of the loop nesting forest in a function.
|
||||
LoopNestingForestRoots() int
|
||||
// LoopNestingForestRoot returns the i-th root of the loop nesting forest in a function.
|
||||
LoopNestingForestRoot(i int) Block
|
||||
// LowestCommonAncestor returns the lowest common ancestor of two blocks in the dominator tree.
|
||||
LowestCommonAncestor(blk1, blk2 Block) Block
|
||||
// Idom returns the immediate dominator of the given block.
|
||||
Idom(blk Block) Block
|
||||
|
||||
// Followings are for rewriting the function.
|
||||
|
||||
// SwapAtEndOfBlock swaps the two virtual registers at the end of the given block.
|
||||
SwapBefore(x1, x2, tmp VReg, instr Instr)
|
||||
// StoreRegisterBefore inserts store instruction(s) before the given instruction for the given virtual register.
|
||||
StoreRegisterBefore(v VReg, instr Instr)
|
||||
// StoreRegisterAfter inserts store instruction(s) after the given instruction for the given virtual register.
|
||||
StoreRegisterAfter(v VReg, instr Instr)
|
||||
// ReloadRegisterBefore inserts reload instruction(s) before the given instruction for the given virtual register.
|
||||
ReloadRegisterBefore(v VReg, instr Instr)
|
||||
// ReloadRegisterAfter inserts reload instruction(s) after the given instruction for the given virtual register.
|
||||
ReloadRegisterAfter(v VReg, instr Instr)
|
||||
// InsertMoveBefore inserts move instruction(s) before the given instruction for the given virtual registers.
|
||||
InsertMoveBefore(dst, src VReg, instr Instr)
|
||||
}
|
||||
|
||||
// Block is a basic block in the CFG of a function, and it consists of multiple instructions, and predecessor Block(s).
|
||||
Block interface {
|
||||
// ID returns the unique identifier of this block which is ordered in the reverse post-order traversal of the CFG.
|
||||
ID() int32
|
||||
// BlockParams returns the virtual registers used as the parameters of this block.
|
||||
BlockParams(*[]VReg) []VReg
|
||||
// InstrIteratorBegin returns the first instruction in this block. Instructions added after lowering must be skipped.
|
||||
// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
|
||||
InstrIteratorBegin() Instr
|
||||
// InstrIteratorNext returns the next instruction in this block. Instructions added after lowering must be skipped.
|
||||
// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
|
||||
InstrIteratorNext() Instr
|
||||
// InstrRevIteratorBegin is the same as InstrIteratorBegin, but in the reverse order.
|
||||
InstrRevIteratorBegin() Instr
|
||||
// InstrRevIteratorNext is the same as InstrIteratorNext, but in the reverse order.
|
||||
InstrRevIteratorNext() Instr
|
||||
// FirstInstr returns the fist instruction in this block where instructions will be inserted after it.
|
||||
FirstInstr() Instr
|
||||
// EndInstr returns the end instruction in this block.
|
||||
EndInstr() Instr
|
||||
// LastInstrForInsertion returns the last instruction in this block where instructions will be inserted before it.
|
||||
// Such insertions only happen when we need to insert spill/reload instructions to adjust the merge edges.
|
||||
// At the time of register allocation, all the critical edges are already split, so there is no need
|
||||
// to worry about the case where branching instruction has multiple successors.
|
||||
// Therefore, usually, it is the nop instruction, but if the block ends with an unconditional branching, then it returns
|
||||
// the unconditional branch, not the nop. In other words it is either nop or unconditional branch.
|
||||
LastInstrForInsertion() Instr
|
||||
// Preds returns the number of predecessors of this block in the CFG.
|
||||
Preds() int
|
||||
// Pred returns the i-th predecessor of this block in the CFG.
|
||||
Pred(i int) Block
|
||||
// Entry returns true if the block is for the entry block.
|
||||
Entry() bool
|
||||
// Succs returns the number of successors of this block in the CFG.
|
||||
Succs() int
|
||||
// Succ returns the i-th successor of this block in the CFG.
|
||||
Succ(i int) Block
|
||||
// LoopHeader returns true if this block is a loop header.
|
||||
LoopHeader() bool
|
||||
// LoopNestingForestChildren returns the number of children of this block in the loop nesting forest.
|
||||
LoopNestingForestChildren() int
|
||||
// LoopNestingForestChild returns the i-th child of this block in the loop nesting forest.
|
||||
LoopNestingForestChild(i int) Block
|
||||
}
|
||||
|
||||
// Instr is an instruction in a block, abstracting away the underlying ISA.
|
||||
Instr interface {
|
||||
fmt.Stringer
|
||||
// Next returns the next instruction in the same block.
|
||||
Next() Instr
|
||||
// Prev returns the previous instruction in the same block.
|
||||
Prev() Instr
|
||||
// Defs returns the virtual registers defined by this instruction.
|
||||
Defs(*[]VReg) []VReg
|
||||
// Uses returns the virtual registers used by this instruction.
|
||||
// Note: multiple returned []VReg will not be held at the same time, so it's safe to use the same slice for this.
|
||||
Uses(*[]VReg) []VReg
|
||||
// AssignUse assigns the RealReg-allocated virtual register used by this instruction at the given index.
|
||||
AssignUse(index int, v VReg)
|
||||
// AssignDef assigns a RealReg-allocated virtual register defined by this instruction.
|
||||
// This only accepts one register because we don't allocate registers for multi-def instructions (i.e. call instruction)
|
||||
AssignDef(VReg)
|
||||
// IsCopy returns true if this instruction is a move instruction between two registers.
|
||||
// If true, the instruction is of the form of dst = src, and if the src and dst do not interfere with each other,
|
||||
// we could coalesce them, and hence the copy can be eliminated from the final code.
|
||||
IsCopy() bool
|
||||
// IsCall returns true if this instruction is a call instruction. The result is used to insert
|
||||
// caller saved register spills and restores.
|
||||
IsCall() bool
|
||||
// IsIndirectCall returns true if this instruction is an indirect call instruction which calls a function pointer.
|
||||
// The result is used to insert caller saved register spills and restores.
|
||||
IsIndirectCall() bool
|
||||
// IsReturn returns true if this instruction is a return instruction.
|
||||
IsReturn() bool
|
||||
// AddedBeforeRegAlloc returns true if this instruction is added before register allocation.
|
||||
AddedBeforeRegAlloc() bool
|
||||
}
|
||||
|
||||
// InstrConstraint is an interface for arch-specific instruction constraints.
|
||||
InstrConstraint interface {
|
||||
comparable
|
||||
Instr
|
||||
}
|
||||
)
|
123
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go
generated
vendored
Normal file
123
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go
generated
vendored
Normal file
@ -0,0 +1,123 @@
|
||||
package regalloc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// VReg represents a register which is assigned to an SSA value. This is used to represent a register in the backend.
|
||||
// A VReg may or may not be a physical register, and the info of physical register can be obtained by RealReg.
|
||||
type VReg uint64
|
||||
|
||||
// VRegID is the lower 32bit of VReg, which is the pure identifier of VReg without RealReg info.
|
||||
type VRegID uint32
|
||||
|
||||
// RealReg returns the RealReg of this VReg.
|
||||
func (v VReg) RealReg() RealReg {
|
||||
return RealReg(v >> 32)
|
||||
}
|
||||
|
||||
// IsRealReg returns true if this VReg is backed by a physical register.
|
||||
func (v VReg) IsRealReg() bool {
|
||||
return v.RealReg() != RealRegInvalid
|
||||
}
|
||||
|
||||
// FromRealReg returns a VReg from the given RealReg and RegType.
|
||||
// This is used to represent a specific pre-colored register in the backend.
|
||||
func FromRealReg(r RealReg, typ RegType) VReg {
|
||||
rid := VRegID(r)
|
||||
if rid > vRegIDReservedForRealNum {
|
||||
panic(fmt.Sprintf("invalid real reg %d", r))
|
||||
}
|
||||
return VReg(r).SetRealReg(r).SetRegType(typ)
|
||||
}
|
||||
|
||||
// SetRealReg sets the RealReg of this VReg and returns the updated VReg.
|
||||
func (v VReg) SetRealReg(r RealReg) VReg {
|
||||
return VReg(r)<<32 | (v & 0xff_00_ffffffff)
|
||||
}
|
||||
|
||||
// RegType returns the RegType of this VReg.
|
||||
func (v VReg) RegType() RegType {
|
||||
return RegType(v >> 40)
|
||||
}
|
||||
|
||||
// SetRegType sets the RegType of this VReg and returns the updated VReg.
|
||||
func (v VReg) SetRegType(t RegType) VReg {
|
||||
return VReg(t)<<40 | (v & 0x00_ff_ffffffff)
|
||||
}
|
||||
|
||||
// ID returns the VRegID of this VReg.
|
||||
func (v VReg) ID() VRegID {
|
||||
return VRegID(v & 0xffffffff)
|
||||
}
|
||||
|
||||
// Valid returns true if this VReg is Valid.
|
||||
func (v VReg) Valid() bool {
|
||||
return v.ID() != vRegIDInvalid && v.RegType() != RegTypeInvalid
|
||||
}
|
||||
|
||||
// RealReg represents a physical register.
|
||||
type RealReg byte
|
||||
|
||||
const RealRegInvalid RealReg = 0
|
||||
|
||||
const (
|
||||
vRegIDInvalid VRegID = 1 << 31
|
||||
VRegIDNonReservedBegin = vRegIDReservedForRealNum
|
||||
vRegIDReservedForRealNum VRegID = 128
|
||||
VRegInvalid = VReg(vRegIDInvalid)
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (r RealReg) String() string {
|
||||
switch r {
|
||||
case RealRegInvalid:
|
||||
return "invalid"
|
||||
default:
|
||||
return fmt.Sprintf("r%d", r)
|
||||
}
|
||||
}
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (v VReg) String() string {
|
||||
if v.IsRealReg() {
|
||||
return fmt.Sprintf("r%d", v.ID())
|
||||
}
|
||||
return fmt.Sprintf("v%d?", v.ID())
|
||||
}
|
||||
|
||||
// RegType represents the type of a register.
|
||||
type RegType byte
|
||||
|
||||
const (
|
||||
RegTypeInvalid RegType = iota
|
||||
RegTypeInt
|
||||
RegTypeFloat
|
||||
NumRegType
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (r RegType) String() string {
|
||||
switch r {
|
||||
case RegTypeInt:
|
||||
return "int"
|
||||
case RegTypeFloat:
|
||||
return "float"
|
||||
default:
|
||||
return "invalid"
|
||||
}
|
||||
}
|
||||
|
||||
// RegTypeOf returns the RegType of the given ssa.Type.
|
||||
func RegTypeOf(p ssa.Type) RegType {
|
||||
switch p {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
return RegTypeInt
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
return RegTypeFloat
|
||||
default:
|
||||
panic("invalid type")
|
||||
}
|
||||
}
|
1212
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
generated
vendored
Normal file
1212
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
108
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
generated
vendored
Normal file
108
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
generated
vendored
Normal file
@ -0,0 +1,108 @@
|
||||
package regalloc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// NewRegSet returns a new RegSet with the given registers.
|
||||
func NewRegSet(regs ...RealReg) RegSet {
|
||||
var ret RegSet
|
||||
for _, r := range regs {
|
||||
ret = ret.add(r)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// RegSet represents a set of registers.
|
||||
type RegSet uint64
|
||||
|
||||
func (rs RegSet) format(info *RegisterInfo) string { //nolint:unused
|
||||
var ret []string
|
||||
for i := 0; i < 64; i++ {
|
||||
if rs&(1<<uint(i)) != 0 {
|
||||
ret = append(ret, info.RealRegName(RealReg(i)))
|
||||
}
|
||||
}
|
||||
return strings.Join(ret, ", ")
|
||||
}
|
||||
|
||||
func (rs RegSet) has(r RealReg) bool {
|
||||
return rs&(1<<uint(r)) != 0
|
||||
}
|
||||
|
||||
func (rs RegSet) add(r RealReg) RegSet {
|
||||
if r >= 64 {
|
||||
return rs
|
||||
}
|
||||
return rs | 1<<uint(r)
|
||||
}
|
||||
|
||||
func (rs RegSet) Range(f func(allocatedRealReg RealReg)) {
|
||||
for i := 0; i < 64; i++ {
|
||||
if rs&(1<<uint(i)) != 0 {
|
||||
f(RealReg(i))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type regInUseSet struct {
|
||||
set RegSet
|
||||
vrs [64]VReg
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) reset() {
|
||||
rs.set = 0
|
||||
for i := range rs.vrs {
|
||||
rs.vrs[i] = VRegInvalid
|
||||
}
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused
|
||||
var ret []string
|
||||
for i := 0; i < 64; i++ {
|
||||
if rs.set&(1<<uint(i)) != 0 {
|
||||
vr := rs.vrs[i]
|
||||
ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.ID()))
|
||||
}
|
||||
}
|
||||
return strings.Join(ret, ", ")
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) has(r RealReg) bool {
|
||||
if r >= 64 {
|
||||
return false
|
||||
}
|
||||
return rs.set&(1<<uint(r)) != 0
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) get(r RealReg) VReg {
|
||||
if r >= 64 {
|
||||
return VRegInvalid
|
||||
}
|
||||
return rs.vrs[r]
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) remove(r RealReg) {
|
||||
if r >= 64 {
|
||||
return
|
||||
}
|
||||
rs.set &= ^(1 << uint(r))
|
||||
rs.vrs[r] = VRegInvalid
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) add(r RealReg, vr VReg) {
|
||||
if r >= 64 {
|
||||
return
|
||||
}
|
||||
rs.set |= 1 << uint(r)
|
||||
rs.vrs[r] = vr
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) {
|
||||
for i := 0; i < 64; i++ {
|
||||
if rs.set&(1<<uint(i)) != 0 {
|
||||
f(RealReg(i), rs.vrs[i])
|
||||
}
|
||||
}
|
||||
}
|
43
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go
generated
vendored
Normal file
43
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go
generated
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// SSAValueDefinition represents a definition of an SSA value.
|
||||
type SSAValueDefinition struct {
|
||||
// BlockParamValue is valid if Instr == nil
|
||||
BlockParamValue ssa.Value
|
||||
|
||||
// BlkParamVReg is valid if Instr == nil
|
||||
BlkParamVReg regalloc.VReg
|
||||
|
||||
// Instr is not nil if this is a definition from an instruction.
|
||||
Instr *ssa.Instruction
|
||||
// N is the index of the return value in the instr's return values list.
|
||||
N int
|
||||
// RefCount is the number of references to the result.
|
||||
RefCount int
|
||||
}
|
||||
|
||||
func (d *SSAValueDefinition) IsFromInstr() bool {
|
||||
return d.Instr != nil
|
||||
}
|
||||
|
||||
func (d *SSAValueDefinition) IsFromBlockParam() bool {
|
||||
return d.Instr == nil
|
||||
}
|
||||
|
||||
func (d *SSAValueDefinition) SSAValue() ssa.Value {
|
||||
if d.IsFromBlockParam() {
|
||||
return d.BlockParamValue
|
||||
} else {
|
||||
r, rs := d.Instr.Returns()
|
||||
if d.N == 0 {
|
||||
return r
|
||||
} else {
|
||||
return rs[d.N-1]
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user