mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[chore] Upgrade wasm-sqlite to v0.16.2 (#2997)
This commit is contained in:
@ -43,7 +43,7 @@ type ExecutableContextT[Instr any] struct {
|
||||
labelPositionPool wazevoapi.Pool[LabelPosition[Instr]]
|
||||
NextLabel Label
|
||||
// LabelPositions maps a label to the instructions of the region which the label represents.
|
||||
LabelPositions map[Label]*LabelPosition[Instr]
|
||||
LabelPositions []*LabelPosition[Instr]
|
||||
OrderedBlockLabels []*LabelPosition[Instr]
|
||||
|
||||
// PerBlockHead and PerBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock.
|
||||
@ -67,7 +67,6 @@ func NewExecutableContextT[Instr any](
|
||||
setNext: setNext,
|
||||
setPrev: setPrev,
|
||||
labelPositionPool: wazevoapi.NewPool[LabelPosition[Instr]](resetLabelPosition[Instr]),
|
||||
LabelPositions: make(map[Label]*LabelPosition[Instr]),
|
||||
NextLabel: LabelInvalid,
|
||||
}
|
||||
}
|
||||
@ -97,11 +96,7 @@ func (e *ExecutableContextT[Instr]) StartBlock(blk ssa.BasicBlock) {
|
||||
end := e.allocateNop0()
|
||||
e.PerBlockHead, e.PerBlockEnd = end, end
|
||||
|
||||
labelPos, ok := e.LabelPositions[l]
|
||||
if !ok {
|
||||
labelPos = e.AllocateLabelPosition(l)
|
||||
e.LabelPositions[l] = labelPos
|
||||
}
|
||||
labelPos := e.GetOrAllocateLabelPosition(l)
|
||||
e.OrderedBlockLabels = append(e.OrderedBlockLabels, labelPos)
|
||||
labelPos.Begin, labelPos.End = end, end
|
||||
labelPos.SB = blk
|
||||
@ -146,8 +141,8 @@ func (e *ExecutableContextT[T]) FlushPendingInstructions() {
|
||||
func (e *ExecutableContextT[T]) Reset() {
|
||||
e.labelPositionPool.Reset()
|
||||
e.InstructionPool.Reset()
|
||||
for l := Label(0); l <= e.NextLabel; l++ {
|
||||
delete(e.LabelPositions, l)
|
||||
for i := range e.LabelPositions {
|
||||
e.LabelPositions[i] = nil
|
||||
}
|
||||
e.PendingInstructions = e.PendingInstructions[:0]
|
||||
e.OrderedBlockLabels = e.OrderedBlockLabels[:0]
|
||||
@ -163,10 +158,17 @@ func (e *ExecutableContextT[T]) AllocateLabel() Label {
|
||||
return e.NextLabel
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[T]) AllocateLabelPosition(la Label) *LabelPosition[T] {
|
||||
l := e.labelPositionPool.Allocate()
|
||||
l.L = la
|
||||
return l
|
||||
func (e *ExecutableContextT[T]) GetOrAllocateLabelPosition(l Label) *LabelPosition[T] {
|
||||
if len(e.LabelPositions) <= int(l) {
|
||||
e.LabelPositions = append(e.LabelPositions, make([]*LabelPosition[T], int(l)+1-len(e.LabelPositions))...)
|
||||
}
|
||||
ret := e.LabelPositions[l]
|
||||
if ret == nil {
|
||||
ret = e.labelPositionPool.Allocate()
|
||||
ret.L = l
|
||||
e.LabelPositions[l] = ret
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[T]) GetOrAllocateSSABlockLabel(blk ssa.BasicBlock) Label {
|
||||
|
16
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
generated
vendored
16
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
generated
vendored
@ -1906,8 +1906,10 @@ func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) {
|
||||
func (m *machine) Format() string {
|
||||
ectx := m.ectx
|
||||
begins := map[*instruction]backend.Label{}
|
||||
for l, pos := range ectx.LabelPositions {
|
||||
begins[pos.Begin] = l
|
||||
for _, pos := range ectx.LabelPositions {
|
||||
if pos != nil {
|
||||
begins[pos.Begin] = pos.L
|
||||
}
|
||||
}
|
||||
|
||||
irBlocks := map[backend.Label]ssa.BasicBlockID{}
|
||||
@ -1950,7 +1952,10 @@ func (m *machine) encodeWithoutSSA(root *instruction) {
|
||||
offset := int64(len(*bufPtr))
|
||||
if cur.kind == nop0 {
|
||||
l := cur.nop0Label()
|
||||
if pos, ok := ectx.LabelPositions[l]; ok {
|
||||
if int(l) >= len(ectx.LabelPositions) {
|
||||
continue
|
||||
}
|
||||
if pos := ectx.LabelPositions[l]; pos != nil {
|
||||
pos.BinaryOffset = offset
|
||||
}
|
||||
}
|
||||
@ -2005,7 +2010,7 @@ func (m *machine) Encode(ctx context.Context) (err error) {
|
||||
switch cur.kind {
|
||||
case nop0:
|
||||
l := cur.nop0Label()
|
||||
if pos, ok := ectx.LabelPositions[l]; ok {
|
||||
if pos := ectx.LabelPositions[l]; pos != nil {
|
||||
pos.BinaryOffset = offset
|
||||
}
|
||||
case sourceOffsetInfo:
|
||||
@ -2165,8 +2170,7 @@ func (m *machine) allocateBrTarget() (nop *instruction, l backend.Label) { //nol
|
||||
func (m *machine) allocateLabel() *labelPosition {
|
||||
ectx := m.ectx
|
||||
l := ectx.AllocateLabel()
|
||||
pos := ectx.AllocateLabelPosition(l)
|
||||
ectx.LabelPositions[l] = pos
|
||||
pos := ectx.GetOrAllocateLabelPosition(l)
|
||||
return pos
|
||||
}
|
||||
|
||||
|
30
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
generated
vendored
30
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
generated
vendored
@ -101,13 +101,14 @@ func (m *machine) LowerParams(args []ssa.Value) {
|
||||
bits := arg.Type.Bits()
|
||||
// At this point of compilation, we don't yet know how much space exist below the return address.
|
||||
// So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation.
|
||||
amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}
|
||||
amode := m.amodePool.Allocate()
|
||||
*amode = addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}
|
||||
load := m.allocateInstr()
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
load.asULoad(operandNR(reg), amode, bits)
|
||||
load.asULoad(reg, amode, bits)
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
load.asFpuLoad(operandNR(reg), amode, bits)
|
||||
load.asFpuLoad(reg, amode, bits)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
@ -169,7 +170,8 @@ func (m *machine) LowerReturns(rets []ssa.Value) {
|
||||
|
||||
// At this point of compilation, we don't yet know how much space exist below the return address.
|
||||
// So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation.
|
||||
amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}
|
||||
amode := m.amodePool.Allocate()
|
||||
*amode = addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}
|
||||
store := m.allocateInstr()
|
||||
store.asStore(operandNR(reg), amode, bits)
|
||||
m.insert(store)
|
||||
@ -215,9 +217,9 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i
|
||||
ldr := m.allocateInstr()
|
||||
switch r.Type {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
ldr.asULoad(operandNR(reg), amode, r.Type.Bits())
|
||||
ldr.asULoad(reg, amode, r.Type.Bits())
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits())
|
||||
ldr.asFpuLoad(reg, amode, r.Type.Bits())
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
@ -225,7 +227,7 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) {
|
||||
func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, *addressMode) {
|
||||
exct := m.executableContext
|
||||
exct.PendingInstructions = exct.PendingInstructions[:0]
|
||||
mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse)
|
||||
@ -235,15 +237,15 @@ func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset
|
||||
return cur, mode
|
||||
}
|
||||
|
||||
func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode {
|
||||
func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) *addressMode {
|
||||
if rn.RegType() != regalloc.RegTypeInt {
|
||||
panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64))
|
||||
}
|
||||
var amode addressMode
|
||||
amode := m.amodePool.Allocate()
|
||||
if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) {
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}
|
||||
*amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}
|
||||
} else if offsetFitsInAddressModeKindRegSignedImm9(offset) {
|
||||
amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}
|
||||
*amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}
|
||||
} else {
|
||||
var indexReg regalloc.VReg
|
||||
if allowTmpRegUse {
|
||||
@ -253,7 +255,7 @@ func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn reg
|
||||
indexReg = m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerConstantI64(indexReg, offset)
|
||||
}
|
||||
amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}
|
||||
*amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}
|
||||
}
|
||||
return amode
|
||||
}
|
||||
@ -315,7 +317,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b
|
||||
} else {
|
||||
ao = aluOpSub
|
||||
}
|
||||
alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true)
|
||||
alu.asALU(ao, rd, operandNR(spVReg), imm12Operand, true)
|
||||
m.insert(alu)
|
||||
} else {
|
||||
m.lowerConstantI64(tmpRegVReg, diff)
|
||||
@ -326,7 +328,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b
|
||||
} else {
|
||||
ao = aluOpSub
|
||||
}
|
||||
alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true)
|
||||
alu.asALU(ao, rd, operandNR(spVReg), operandNR(tmpRegVReg), true)
|
||||
m.insert(alu)
|
||||
}
|
||||
}
|
||||
|
@ -59,25 +59,26 @@ func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regallo
|
||||
} else {
|
||||
postIndexImm = 8
|
||||
}
|
||||
loadMode := addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm}
|
||||
loadMode := m.amodePool.Allocate()
|
||||
*loadMode = addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm}
|
||||
|
||||
instr := m.allocateInstr()
|
||||
switch typ {
|
||||
case ssa.TypeI32:
|
||||
instr.asULoad(loadTargetReg, loadMode, 32)
|
||||
instr.asULoad(loadTargetReg.reg(), loadMode, 32)
|
||||
case ssa.TypeI64:
|
||||
instr.asULoad(loadTargetReg, loadMode, 64)
|
||||
instr.asULoad(loadTargetReg.reg(), loadMode, 64)
|
||||
case ssa.TypeF32:
|
||||
instr.asFpuLoad(loadTargetReg, loadMode, 32)
|
||||
instr.asFpuLoad(loadTargetReg.reg(), loadMode, 32)
|
||||
case ssa.TypeF64:
|
||||
instr.asFpuLoad(loadTargetReg, loadMode, 64)
|
||||
instr.asFpuLoad(loadTargetReg.reg(), loadMode, 64)
|
||||
case ssa.TypeV128:
|
||||
instr.asFpuLoad(loadTargetReg, loadMode, 128)
|
||||
instr.asFpuLoad(loadTargetReg.reg(), loadMode, 128)
|
||||
}
|
||||
cur = linkInstr(cur, instr)
|
||||
|
||||
if isStackArg {
|
||||
var storeMode addressMode
|
||||
var storeMode *addressMode
|
||||
cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true)
|
||||
toStack := m.allocateInstr()
|
||||
toStack.asStore(loadTargetReg, storeMode, bits)
|
||||
@ -113,21 +114,22 @@ func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr reg
|
||||
}
|
||||
|
||||
if isStackArg {
|
||||
var loadMode addressMode
|
||||
var loadMode *addressMode
|
||||
cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true)
|
||||
toReg := m.allocateInstr()
|
||||
switch typ {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
toReg.asULoad(storeTargetReg, loadMode, bits)
|
||||
toReg.asULoad(storeTargetReg.reg(), loadMode, bits)
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
toReg.asFpuLoad(storeTargetReg, loadMode, bits)
|
||||
toReg.asFpuLoad(storeTargetReg.reg(), loadMode, bits)
|
||||
default:
|
||||
panic("TODO?")
|
||||
}
|
||||
cur = linkInstr(cur, toReg)
|
||||
}
|
||||
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm}
|
||||
mode := m.amodePool.Allocate()
|
||||
*mode = addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm}
|
||||
instr := m.allocateInstr()
|
||||
instr.asStore(storeTargetReg, mode, bits)
|
||||
cur = linkInstr(cur, instr)
|
||||
@ -214,11 +216,12 @@ func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction
|
||||
|
||||
func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction {
|
||||
instr := m.allocateInstr()
|
||||
mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()}
|
||||
mode := m.amodePool.Allocate()
|
||||
*mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()}
|
||||
if store {
|
||||
instr.asStore(operandNR(d), mode, 64)
|
||||
} else {
|
||||
instr.asULoad(operandNR(d), mode, 64)
|
||||
instr.asULoad(d, mode, 64)
|
||||
}
|
||||
return linkInstr(prev, instr)
|
||||
}
|
||||
|
119
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
generated
vendored
119
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
generated
vendored
@ -87,7 +87,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
|
||||
// Module context is always the second argument.
|
||||
moduleCtrPtr := x1VReg
|
||||
store := m.allocateInstr()
|
||||
amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset}
|
||||
amode := m.amodePool.Allocate()
|
||||
*amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset}
|
||||
store.asStore(operandNR(moduleCtrPtr), amode, 64)
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
@ -120,11 +121,9 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
|
||||
} else {
|
||||
sizeInBits = 64
|
||||
}
|
||||
store.asStore(operandNR(v),
|
||||
addressMode{
|
||||
kind: addressModeKindPostIndex,
|
||||
rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8),
|
||||
}, sizeInBits)
|
||||
amode := m.amodePool.Allocate()
|
||||
*amode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8)}
|
||||
store.asStore(operandNR(v), amode, sizeInBits)
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
|
||||
@ -139,7 +138,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
|
||||
frameSizeReg = xzrVReg
|
||||
sliceSizeReg = xzrVReg
|
||||
}
|
||||
_amode := addressModePreOrPostIndex(spVReg, -16, true)
|
||||
_amode := addressModePreOrPostIndex(m, spVReg, -16, true)
|
||||
storeP := m.allocateInstr()
|
||||
storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode)
|
||||
cur = linkInstr(cur, storeP)
|
||||
@ -165,8 +164,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true)
|
||||
ldr := m.allocateInstr()
|
||||
// And load the return address.
|
||||
ldr.asULoad(operandNR(lrVReg),
|
||||
addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
|
||||
amode := addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */)
|
||||
ldr.asULoad(lrVReg, amode, 64)
|
||||
cur = linkInstr(cur, ldr)
|
||||
|
||||
originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want.
|
||||
@ -183,23 +182,24 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
|
||||
r := &abi.Rets[i]
|
||||
if r.Kind == backend.ABIArgKindReg {
|
||||
loadIntoReg := m.allocateInstr()
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
|
||||
mode := m.amodePool.Allocate()
|
||||
*mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
|
||||
switch r.Type {
|
||||
case ssa.TypeI32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoReg.asULoad(operandNR(r.Reg), mode, 32)
|
||||
loadIntoReg.asULoad(r.Reg, mode, 32)
|
||||
case ssa.TypeI64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoReg.asULoad(operandNR(r.Reg), mode, 64)
|
||||
loadIntoReg.asULoad(r.Reg, mode, 64)
|
||||
case ssa.TypeF32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32)
|
||||
loadIntoReg.asFpuLoad(r.Reg, mode, 32)
|
||||
case ssa.TypeF64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64)
|
||||
loadIntoReg.asFpuLoad(r.Reg, mode, 64)
|
||||
case ssa.TypeV128:
|
||||
mode.imm = 16
|
||||
loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128)
|
||||
loadIntoReg.asFpuLoad(r.Reg, mode, 128)
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
@ -208,28 +208,29 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
|
||||
// First we need to load the value to a temporary just like ^^.
|
||||
intTmp, floatTmp := x11VReg, v11VReg
|
||||
loadIntoTmpReg := m.allocateInstr()
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
|
||||
mode := m.amodePool.Allocate()
|
||||
*mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
|
||||
var resultReg regalloc.VReg
|
||||
switch r.Type {
|
||||
case ssa.TypeI32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32)
|
||||
loadIntoTmpReg.asULoad(intTmp, mode, 32)
|
||||
resultReg = intTmp
|
||||
case ssa.TypeI64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64)
|
||||
loadIntoTmpReg.asULoad(intTmp, mode, 64)
|
||||
resultReg = intTmp
|
||||
case ssa.TypeF32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32)
|
||||
loadIntoTmpReg.asFpuLoad(floatTmp, mode, 32)
|
||||
resultReg = floatTmp
|
||||
case ssa.TypeF64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64)
|
||||
loadIntoTmpReg.asFpuLoad(floatTmp, mode, 64)
|
||||
resultReg = floatTmp
|
||||
case ssa.TypeV128:
|
||||
mode.imm = 16
|
||||
loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128)
|
||||
loadIntoTmpReg.asFpuLoad(floatTmp, mode, 128)
|
||||
resultReg = floatTmp
|
||||
default:
|
||||
panic("TODO")
|
||||
@ -258,12 +259,13 @@ func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regal
|
||||
case regalloc.RegTypeFloat:
|
||||
sizeInBits = 128
|
||||
}
|
||||
store.asStore(operandNR(v),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: offset,
|
||||
}, sizeInBits)
|
||||
mode := m.amodePool.Allocate()
|
||||
*mode = addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: offset,
|
||||
}
|
||||
store.asStore(operandNR(v), mode, sizeInBits)
|
||||
store.prev = cur
|
||||
cur.next = store
|
||||
cur = store
|
||||
@ -276,7 +278,7 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re
|
||||
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
|
||||
for _, v := range regs {
|
||||
load := m.allocateInstr()
|
||||
var as func(dst operand, amode addressMode, sizeInBits byte)
|
||||
var as func(dst regalloc.VReg, amode *addressMode, sizeInBits byte)
|
||||
var sizeInBits byte
|
||||
switch v.RegType() {
|
||||
case regalloc.RegTypeInt:
|
||||
@ -286,12 +288,13 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re
|
||||
as = load.asFpuLoad
|
||||
sizeInBits = 128
|
||||
}
|
||||
as(operandNR(v),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: offset,
|
||||
}, sizeInBits)
|
||||
mode := m.amodePool.Allocate()
|
||||
*mode = addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: offset,
|
||||
}
|
||||
as(v, mode, sizeInBits)
|
||||
cur = linkInstr(cur, load)
|
||||
offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16.
|
||||
}
|
||||
@ -324,11 +327,9 @@ func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode
|
||||
|
||||
// Set the exit status on the execution context.
|
||||
setExistStatus := m.allocateInstr()
|
||||
setExistStatus.asStore(operandNR(constReg),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
|
||||
}, 32)
|
||||
mode := m.amodePool.Allocate()
|
||||
*mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64()}
|
||||
setExistStatus.asStore(operandNR(constReg), mode, 32)
|
||||
cur = linkInstr(cur, setExistStatus)
|
||||
return cur
|
||||
}
|
||||
@ -340,12 +341,13 @@ func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction {
|
||||
cur = linkInstr(cur, adr)
|
||||
|
||||
storeReturnAddr := m.allocateInstr()
|
||||
storeReturnAddr.asStore(operandNR(tmpRegVReg),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
|
||||
}, 64)
|
||||
mode := m.amodePool.Allocate()
|
||||
*mode = addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
|
||||
}
|
||||
storeReturnAddr.asStore(operandNR(tmpRegVReg), mode, 64)
|
||||
cur = linkInstr(cur, storeReturnAddr)
|
||||
|
||||
// Exit the execution.
|
||||
@ -364,11 +366,12 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe
|
||||
cur = linkInstr(cur, movSp)
|
||||
|
||||
strSp := m.allocateInstr()
|
||||
strSp.asStore(operandNR(tmpRegVReg),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
|
||||
}, 64)
|
||||
mode := m.amodePool.Allocate()
|
||||
*mode = addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
|
||||
}
|
||||
strSp.asStore(operandNR(tmpRegVReg), mode, 64)
|
||||
cur = linkInstr(cur, strSp)
|
||||
return cur
|
||||
}
|
||||
@ -376,27 +379,28 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe
|
||||
func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) {
|
||||
load := m.allocateInstr()
|
||||
var result regalloc.VReg
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg}
|
||||
mode := m.amodePool.Allocate()
|
||||
*mode = addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg}
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
load.asULoad(operandNR(intVReg), mode, 32)
|
||||
load.asULoad(intVReg, mode, 32)
|
||||
result = intVReg
|
||||
case ssa.TypeI64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
load.asULoad(operandNR(intVReg), mode, 64)
|
||||
load.asULoad(intVReg, mode, 64)
|
||||
result = intVReg
|
||||
case ssa.TypeF32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
load.asFpuLoad(operandNR(floatVReg), mode, 32)
|
||||
load.asFpuLoad(floatVReg, mode, 32)
|
||||
result = floatVReg
|
||||
case ssa.TypeF64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
load.asFpuLoad(operandNR(floatVReg), mode, 64)
|
||||
load.asFpuLoad(floatVReg, mode, 64)
|
||||
result = floatVReg
|
||||
case ssa.TypeV128:
|
||||
mode.imm = 16
|
||||
load.asFpuLoad(operandNR(floatVReg), mode, 128)
|
||||
load.asFpuLoad(floatVReg, mode, 128)
|
||||
result = floatVReg
|
||||
default:
|
||||
panic("TODO")
|
||||
@ -408,7 +412,8 @@ func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg r
|
||||
|
||||
func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction {
|
||||
store := m.allocateInstr()
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg}
|
||||
mode := m.amodePool.Allocate()
|
||||
*mode = addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg}
|
||||
var sizeInBits byte
|
||||
switch result.Type {
|
||||
case ssa.TypeI32, ssa.TypeF32:
|
||||
|
438
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
generated
vendored
438
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
generated
vendored
File diff suppressed because it is too large
Load Diff
@ -44,12 +44,12 @@ func (i *instruction) encode(m *machine) {
|
||||
case callInd:
|
||||
c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true))
|
||||
case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128:
|
||||
c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode))
|
||||
c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], *i.getAmode()))
|
||||
case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128:
|
||||
c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode))
|
||||
c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.RealReg()], *i.getAmode()))
|
||||
case vecLoad1R:
|
||||
c.Emit4Bytes(encodeVecLoad1R(
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
vecArrangement(i.u1)))
|
||||
case condBr:
|
||||
@ -75,22 +75,22 @@ func (i *instruction) encode(m *machine) {
|
||||
panic("BUG")
|
||||
}
|
||||
case movN:
|
||||
c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3))
|
||||
c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32)))
|
||||
case movZ:
|
||||
c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3))
|
||||
c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32)))
|
||||
case movK:
|
||||
c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3))
|
||||
c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32)))
|
||||
case mov32:
|
||||
to, from := i.rd.realReg(), i.rn.realReg()
|
||||
to, from := i.rd.RealReg(), i.rn.realReg()
|
||||
c.Emit4Bytes(encodeAsMov32(regNumberInEncoding[from], regNumberInEncoding[to]))
|
||||
case mov64:
|
||||
to, from := i.rd.realReg(), i.rn.realReg()
|
||||
to, from := i.rd.RealReg(), i.rn.realReg()
|
||||
toIsSp := to == sp
|
||||
fromIsSp := from == sp
|
||||
c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp))
|
||||
case loadP64, storeP64:
|
||||
rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()]
|
||||
amode := i.amode
|
||||
amode := i.getAmode()
|
||||
rn := regNumberInEncoding[amode.rn.RealReg()]
|
||||
var pre bool
|
||||
switch amode.kind {
|
||||
@ -102,21 +102,21 @@ func (i *instruction) encode(m *machine) {
|
||||
}
|
||||
c.Emit4Bytes(encodePreOrPostIndexLoadStorePair64(pre, kind == loadP64, rn, rt, rt2, amode.imm))
|
||||
case loadFpuConst32:
|
||||
rd := regNumberInEncoding[i.rd.realReg()]
|
||||
rd := regNumberInEncoding[i.rd.RealReg()]
|
||||
if i.u1 == 0 {
|
||||
c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B))
|
||||
} else {
|
||||
encodeLoadFpuConst32(c, rd, i.u1)
|
||||
}
|
||||
case loadFpuConst64:
|
||||
rd := regNumberInEncoding[i.rd.realReg()]
|
||||
rd := regNumberInEncoding[i.rd.RealReg()]
|
||||
if i.u1 == 0 {
|
||||
c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B))
|
||||
} else {
|
||||
encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.realReg()], i.u1)
|
||||
encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.RealReg()], i.u1)
|
||||
}
|
||||
case loadFpuConst128:
|
||||
rd := regNumberInEncoding[i.rd.realReg()]
|
||||
rd := regNumberInEncoding[i.rd.RealReg()]
|
||||
lo, hi := i.u1, i.u2
|
||||
if lo == 0 && hi == 0 {
|
||||
c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement16B))
|
||||
@ -126,35 +126,35 @@ func (i *instruction) encode(m *machine) {
|
||||
case aluRRRR:
|
||||
c.Emit4Bytes(encodeAluRRRR(
|
||||
aluOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
regNumberInEncoding[i.rm.realReg()],
|
||||
regNumberInEncoding[i.ra.realReg()],
|
||||
uint32(i.u3),
|
||||
regNumberInEncoding[regalloc.VReg(i.u2).RealReg()],
|
||||
uint32(i.u1>>32),
|
||||
))
|
||||
case aluRRImmShift:
|
||||
c.Emit4Bytes(encodeAluRRImm(
|
||||
aluOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
uint32(i.rm.shiftImm()),
|
||||
uint32(i.u3),
|
||||
uint32(i.u2>>32),
|
||||
))
|
||||
case aluRRR:
|
||||
rn := i.rn.realReg()
|
||||
c.Emit4Bytes(encodeAluRRR(
|
||||
aluOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[rn],
|
||||
regNumberInEncoding[i.rm.realReg()],
|
||||
i.u3 == 1,
|
||||
i.u2>>32 == 1,
|
||||
rn == sp,
|
||||
))
|
||||
case aluRRRExtend:
|
||||
rm, exo, to := i.rm.er()
|
||||
c.Emit4Bytes(encodeAluRRRExtend(
|
||||
aluOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
regNumberInEncoding[rm.RealReg()],
|
||||
exo,
|
||||
@ -164,25 +164,25 @@ func (i *instruction) encode(m *machine) {
|
||||
r, amt, sop := i.rm.sr()
|
||||
c.Emit4Bytes(encodeAluRRRShift(
|
||||
aluOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
regNumberInEncoding[r.RealReg()],
|
||||
uint32(amt),
|
||||
sop,
|
||||
i.u3 == 1,
|
||||
i.u2>>32 == 1,
|
||||
))
|
||||
case aluRRBitmaskImm:
|
||||
c.Emit4Bytes(encodeAluBitmaskImmediate(
|
||||
aluOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
i.u2,
|
||||
i.u3 == 1,
|
||||
i.u1>>32 == 1,
|
||||
))
|
||||
case bitRR:
|
||||
c.Emit4Bytes(encodeBitRR(
|
||||
bitOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
uint32(i.u2)),
|
||||
)
|
||||
@ -190,22 +190,22 @@ func (i *instruction) encode(m *machine) {
|
||||
imm12, shift := i.rm.imm12()
|
||||
c.Emit4Bytes(encodeAluRRImm12(
|
||||
aluOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
imm12, shift,
|
||||
i.u3 == 1,
|
||||
i.u2>>32 == 1,
|
||||
))
|
||||
case fpuRRR:
|
||||
c.Emit4Bytes(encodeFpuRRR(
|
||||
fpuBinOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
regNumberInEncoding[i.rm.realReg()],
|
||||
i.u3 == 1,
|
||||
i.u2 == 1,
|
||||
))
|
||||
case fpuMov64, fpuMov128:
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MOV--vector---Move-vector--an-alias-of-ORR--vector--register--
|
||||
rd := regNumberInEncoding[i.rd.realReg()]
|
||||
rd := regNumberInEncoding[i.rd.RealReg()]
|
||||
rn := regNumberInEncoding[i.rn.realReg()]
|
||||
var q uint32
|
||||
if kind == fpuMov128 {
|
||||
@ -213,7 +213,7 @@ func (i *instruction) encode(m *machine) {
|
||||
}
|
||||
c.Emit4Bytes(q<<30 | 0b1110101<<21 | rn<<16 | 0b000111<<10 | rn<<5 | rd)
|
||||
case cSet:
|
||||
rd := regNumberInEncoding[i.rd.realReg()]
|
||||
rd := regNumberInEncoding[i.rd.RealReg()]
|
||||
cf := condFlag(i.u1)
|
||||
if i.u2 == 1 {
|
||||
// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/CSETM--Conditional-Set-Mask--an-alias-of-CSINV-
|
||||
@ -225,12 +225,12 @@ func (i *instruction) encode(m *machine) {
|
||||
c.Emit4Bytes(0b1001101010011111<<16 | uint32(cf.invert())<<12 | 0b111111<<5 | rd)
|
||||
}
|
||||
case extend:
|
||||
c.Emit4Bytes(encodeExtend(i.u3 == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.realReg()], regNumberInEncoding[i.rn.realReg()]))
|
||||
c.Emit4Bytes(encodeExtend((i.u2>>32) == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()]))
|
||||
case fpuCmp:
|
||||
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/FCMP--Floating-point-quiet-Compare--scalar--?lang=en
|
||||
rn, rm := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()]
|
||||
var ftype uint32
|
||||
if i.u3 == 1 {
|
||||
if i.u1 == 1 {
|
||||
ftype = 0b01 // double precision.
|
||||
}
|
||||
c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5)
|
||||
@ -242,34 +242,34 @@ func (i *instruction) encode(m *machine) {
|
||||
c.Emit4Bytes(0)
|
||||
}
|
||||
case adr:
|
||||
c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1)))
|
||||
c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.RealReg()], uint32(i.u1)))
|
||||
case cSel:
|
||||
c.Emit4Bytes(encodeConditionalSelect(
|
||||
kind,
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
regNumberInEncoding[i.rm.realReg()],
|
||||
condFlag(i.u1),
|
||||
i.u3 == 1,
|
||||
i.u2 == 1,
|
||||
))
|
||||
case fpuCSel:
|
||||
c.Emit4Bytes(encodeFpuCSel(
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
regNumberInEncoding[i.rm.realReg()],
|
||||
condFlag(i.u1),
|
||||
i.u3 == 1,
|
||||
i.u2 == 1,
|
||||
))
|
||||
case movToVec:
|
||||
c.Emit4Bytes(encodeMoveToVec(
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
vecArrangement(byte(i.u1)),
|
||||
vecIndex(i.u2),
|
||||
))
|
||||
case movFromVec, movFromVecSigned:
|
||||
c.Emit4Bytes(encodeMoveFromVec(
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
vecArrangement(byte(i.u1)),
|
||||
vecIndex(i.u2),
|
||||
@ -277,18 +277,18 @@ func (i *instruction) encode(m *machine) {
|
||||
))
|
||||
case vecDup:
|
||||
c.Emit4Bytes(encodeVecDup(
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
vecArrangement(byte(i.u1))))
|
||||
case vecDupElement:
|
||||
c.Emit4Bytes(encodeVecDupElement(
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
vecArrangement(byte(i.u1)),
|
||||
vecIndex(i.u2)))
|
||||
case vecExtract:
|
||||
c.Emit4Bytes(encodeVecExtract(
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
regNumberInEncoding[i.rm.realReg()],
|
||||
vecArrangement(byte(i.u1)),
|
||||
@ -296,35 +296,35 @@ func (i *instruction) encode(m *machine) {
|
||||
case vecPermute:
|
||||
c.Emit4Bytes(encodeVecPermute(
|
||||
vecOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
regNumberInEncoding[i.rm.realReg()],
|
||||
vecArrangement(byte(i.u2))))
|
||||
case vecMovElement:
|
||||
c.Emit4Bytes(encodeVecMovElement(
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
vecArrangement(i.u1),
|
||||
uint32(i.u2), uint32(i.u3),
|
||||
uint32(i.u2), uint32(i.u2>>32),
|
||||
))
|
||||
case vecMisc:
|
||||
c.Emit4Bytes(encodeAdvancedSIMDTwoMisc(
|
||||
vecOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
vecArrangement(i.u2),
|
||||
))
|
||||
case vecLanes:
|
||||
c.Emit4Bytes(encodeVecLanes(
|
||||
vecOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
vecArrangement(i.u2),
|
||||
))
|
||||
case vecShiftImm:
|
||||
c.Emit4Bytes(encodeVecShiftImm(
|
||||
vecOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
uint32(i.rm.shiftImm()),
|
||||
vecArrangement(i.u2),
|
||||
@ -332,7 +332,7 @@ func (i *instruction) encode(m *machine) {
|
||||
case vecTbl:
|
||||
c.Emit4Bytes(encodeVecTbl(
|
||||
1,
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
regNumberInEncoding[i.rm.realReg()],
|
||||
vecArrangement(i.u2)),
|
||||
@ -340,7 +340,7 @@ func (i *instruction) encode(m *machine) {
|
||||
case vecTbl2:
|
||||
c.Emit4Bytes(encodeVecTbl(
|
||||
2,
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
regNumberInEncoding[i.rm.realReg()],
|
||||
vecArrangement(i.u2)),
|
||||
@ -353,9 +353,9 @@ func (i *instruction) encode(m *machine) {
|
||||
case fpuRR:
|
||||
c.Emit4Bytes(encodeFloatDataOneSource(
|
||||
fpuUniOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
i.u3 == 1,
|
||||
i.u2 == 1,
|
||||
))
|
||||
case vecRRR:
|
||||
if op := vecOp(i.u1); op == vecOpBsl || op == vecOpBit || op == vecOpUmlal {
|
||||
@ -365,14 +365,14 @@ func (i *instruction) encode(m *machine) {
|
||||
case vecRRRRewrite:
|
||||
c.Emit4Bytes(encodeVecRRR(
|
||||
vecOp(i.u1),
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
regNumberInEncoding[i.rm.realReg()],
|
||||
vecArrangement(i.u2),
|
||||
))
|
||||
case cCmpImm:
|
||||
// Conditional compare (immediate) in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en
|
||||
sf := uint32(i.u3 & 0b1)
|
||||
sf := uint32((i.u2 >> 32) & 0b1)
|
||||
nzcv := uint32(i.u2 & 0b1111)
|
||||
cond := uint32(condFlag(i.u1))
|
||||
imm := uint32(i.rm.data & 0b11111)
|
||||
@ -381,7 +381,7 @@ func (i *instruction) encode(m *machine) {
|
||||
sf<<31 | 0b111101001<<22 | imm<<16 | cond<<12 | 0b1<<11 | rn<<5 | nzcv,
|
||||
)
|
||||
case movFromFPSR:
|
||||
rt := regNumberInEncoding[i.rd.realReg()]
|
||||
rt := regNumberInEncoding[i.rd.RealReg()]
|
||||
c.Emit4Bytes(encodeSystemRegisterMove(rt, true))
|
||||
case movToFPSR:
|
||||
rt := regNumberInEncoding[i.rn.realReg()]
|
||||
@ -390,13 +390,13 @@ func (i *instruction) encode(m *machine) {
|
||||
c.Emit4Bytes(encodeAtomicRmw(
|
||||
atomicRmwOp(i.u1),
|
||||
regNumberInEncoding[i.rm.realReg()],
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
uint32(i.u2),
|
||||
))
|
||||
case atomicCas:
|
||||
c.Emit4Bytes(encodeAtomicCas(
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
regNumberInEncoding[i.rm.realReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
uint32(i.u2),
|
||||
@ -404,7 +404,7 @@ func (i *instruction) encode(m *machine) {
|
||||
case atomicLoad:
|
||||
c.Emit4Bytes(encodeAtomicLoadStore(
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rd.RealReg()],
|
||||
uint32(i.u2),
|
||||
1,
|
||||
))
|
||||
@ -810,7 +810,7 @@ func encodeFloatDataOneSource(op fpuUniOp, rd, rn uint32, dst64bit bool) uint32
|
||||
// encodeCnvBetweenFloatInt encodes as "Conversion between floating-point and integer" in
|
||||
// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
|
||||
func encodeCnvBetweenFloatInt(i *instruction) uint32 {
|
||||
rd := regNumberInEncoding[i.rd.realReg()]
|
||||
rd := regNumberInEncoding[i.rd.RealReg()]
|
||||
rn := regNumberInEncoding[i.rn.realReg()]
|
||||
|
||||
var opcode uint32
|
||||
@ -822,8 +822,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 {
|
||||
rmode = 0b00
|
||||
|
||||
signed := i.u1 == 1
|
||||
src64bit := i.u2 == 1
|
||||
dst64bit := i.u3 == 1
|
||||
src64bit := i.u2&1 != 0
|
||||
dst64bit := i.u2&2 != 0
|
||||
if signed {
|
||||
opcode = 0b010
|
||||
} else {
|
||||
@ -841,8 +841,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 {
|
||||
rmode = 0b11
|
||||
|
||||
signed := i.u1 == 1
|
||||
src64bit := i.u2 == 1
|
||||
dst64bit := i.u3 == 1
|
||||
src64bit := i.u2&1 != 0
|
||||
dst64bit := i.u2&2 != 0
|
||||
|
||||
if signed {
|
||||
opcode = 0b000
|
||||
@ -1787,13 +1787,13 @@ func encodeCBZCBNZ(rt uint32, nz bool, imm19 uint32, _64bit bool) (ret uint32) {
|
||||
// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en
|
||||
//
|
||||
// "shift" must have been divided by 16 at this point.
|
||||
func encodeMoveWideImmediate(opc uint32, rd uint32, imm, shift, _64bit uint64) (ret uint32) {
|
||||
func encodeMoveWideImmediate(opc uint32, rd uint32, imm uint64, shift, _64bit uint32) (ret uint32) {
|
||||
ret = rd
|
||||
ret |= uint32(imm&0xffff) << 5
|
||||
ret |= (uint32(shift)) << 21
|
||||
ret |= (shift) << 21
|
||||
ret |= 0b100101 << 23
|
||||
ret |= opc << 29
|
||||
ret |= uint32(_64bit) << 31
|
||||
ret |= _64bit << 31
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -284,18 +284,18 @@ func (m *machine) load64bitConst(c int64, dst regalloc.VReg) {
|
||||
|
||||
func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
|
||||
instr := m.allocateInstr()
|
||||
instr.asMOVZ(dst, v, uint64(shift), dst64)
|
||||
instr.asMOVZ(dst, v, uint32(shift), dst64)
|
||||
m.insert(instr)
|
||||
}
|
||||
|
||||
func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
|
||||
instr := m.allocateInstr()
|
||||
instr.asMOVK(dst, v, uint64(shift), dst64)
|
||||
instr.asMOVK(dst, v, uint32(shift), dst64)
|
||||
m.insert(instr)
|
||||
}
|
||||
|
||||
func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
|
||||
instr := m.allocateInstr()
|
||||
instr.asMOVN(dst, v, uint64(shift), dst64)
|
||||
instr.asMOVN(dst, v, uint32(shift), dst64)
|
||||
m.insert(instr)
|
||||
}
|
||||
|
379
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
generated
vendored
379
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
generated
vendored
File diff suppressed because it is too large
Load Diff
@ -24,6 +24,14 @@ type (
|
||||
addressModeKind byte
|
||||
)
|
||||
|
||||
func resetAddressMode(a *addressMode) {
|
||||
a.kind = 0
|
||||
a.rn = 0
|
||||
a.rm = 0
|
||||
a.extOp = 0
|
||||
a.imm = 0
|
||||
}
|
||||
|
||||
const (
|
||||
// addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended,
|
||||
// and then scaled by bits(type)/8.
|
||||
@ -140,15 +148,17 @@ func (a addressMode) format(dstSizeBits byte) (ret string) {
|
||||
return
|
||||
}
|
||||
|
||||
func addressModePreOrPostIndex(rn regalloc.VReg, imm int64, preIndex bool) addressMode {
|
||||
func addressModePreOrPostIndex(m *machine, rn regalloc.VReg, imm int64, preIndex bool) *addressMode {
|
||||
if !offsetFitsInAddressModeKindRegSignedImm9(imm) {
|
||||
panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm))
|
||||
}
|
||||
mode := m.amodePool.Allocate()
|
||||
if preIndex {
|
||||
return addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm}
|
||||
*mode = addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm}
|
||||
} else {
|
||||
return addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm}
|
||||
*mode = addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm}
|
||||
}
|
||||
return mode
|
||||
}
|
||||
|
||||
func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool {
|
||||
@ -207,9 +217,9 @@ func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret
|
||||
amode := m.lowerToAddressMode(ptr, offset, size)
|
||||
load := m.allocateInstr()
|
||||
if signed {
|
||||
load.asSLoad(operandNR(ret), amode, size)
|
||||
load.asSLoad(ret, amode, size)
|
||||
} else {
|
||||
load.asULoad(operandNR(ret), amode, size)
|
||||
load.asULoad(ret, amode, size)
|
||||
}
|
||||
m.insert(load)
|
||||
}
|
||||
@ -221,11 +231,11 @@ func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa.
|
||||
load := m.allocateInstr()
|
||||
switch typ {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
load.asULoad(operandNR(dst), amode, typ.Bits())
|
||||
load.asULoad(dst, amode, typ.Bits())
|
||||
case ssa.TypeF32, ssa.TypeF64:
|
||||
load.asFpuLoad(operandNR(dst), amode, typ.Bits())
|
||||
load.asFpuLoad(dst, amode, typ.Bits())
|
||||
case ssa.TypeV128:
|
||||
load.asFpuLoad(operandNR(dst), amode, 128)
|
||||
load.asFpuLoad(dst, amode, 128)
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
@ -239,7 +249,7 @@ func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane,
|
||||
m.lowerConstantI64(offsetReg, int64(offset))
|
||||
addedBase := m.addReg64ToReg64(base, offsetReg)
|
||||
|
||||
rd := operandNR(m.compiler.VRegOf(ret))
|
||||
rd := m.compiler.VRegOf(ret)
|
||||
|
||||
ld1r := m.allocateInstr()
|
||||
ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane))
|
||||
@ -258,7 +268,7 @@ func (m *machine) lowerStore(si *ssa.Instruction) {
|
||||
}
|
||||
|
||||
// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
|
||||
func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode addressMode) {
|
||||
func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode *addressMode) {
|
||||
// TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and
|
||||
// addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed
|
||||
// to support more efficient address resolution.
|
||||
@ -272,32 +282,33 @@ func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte
|
||||
// During the construction, this might emit additional instructions.
|
||||
//
|
||||
// Extracted as a separate function for easy testing.
|
||||
func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode addressMode) {
|
||||
func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode *addressMode) {
|
||||
amode = m.amodePool.Allocate()
|
||||
switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); {
|
||||
case a64sExist && a32sExist:
|
||||
var base regalloc.VReg
|
||||
base = a64s.Dequeue()
|
||||
var a32 addend32
|
||||
a32 = a32s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
|
||||
*amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
|
||||
case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset):
|
||||
var base regalloc.VReg
|
||||
base = a64s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
|
||||
*amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
|
||||
offset = 0
|
||||
case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset):
|
||||
var base regalloc.VReg
|
||||
base = a64s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
|
||||
*amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
|
||||
offset = 0
|
||||
case a64sExist:
|
||||
var base regalloc.VReg
|
||||
base = a64s.Dequeue()
|
||||
if !a64s.Empty() {
|
||||
index := a64s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
|
||||
*amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
|
||||
} else {
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
|
||||
*amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
|
||||
}
|
||||
case a32sExist:
|
||||
base32 := a32s.Dequeue()
|
||||
@ -314,14 +325,14 @@ func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32],
|
||||
|
||||
if !a32s.Empty() {
|
||||
index := a32s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
|
||||
*amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
|
||||
} else {
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
|
||||
*amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
|
||||
}
|
||||
default: // Only static offsets.
|
||||
tmpReg := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerConstantI64(tmpReg, offset)
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0}
|
||||
*amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0}
|
||||
offset = 0
|
||||
}
|
||||
|
||||
@ -411,13 +422,13 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
|
||||
rd = m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
alu := m.allocateInstr()
|
||||
if imm12Op, ok := asImm12Operand(uint64(c)); ok {
|
||||
alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), imm12Op, true)
|
||||
alu.asALU(aluOpAdd, rd, operandNR(r), imm12Op, true)
|
||||
} else if imm12Op, ok = asImm12Operand(uint64(-c)); ok {
|
||||
alu.asALU(aluOpSub, operandNR(rd), operandNR(r), imm12Op, true)
|
||||
alu.asALU(aluOpSub, rd, operandNR(r), imm12Op, true)
|
||||
} else {
|
||||
tmp := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
m.load64bitConst(c, tmp)
|
||||
alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), operandNR(tmp), true)
|
||||
alu.asALU(aluOpAdd, rd, operandNR(r), operandNR(tmp), true)
|
||||
}
|
||||
m.insert(alu)
|
||||
return
|
||||
@ -426,7 +437,7 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
|
||||
func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) {
|
||||
rd = m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
alu := m.allocateInstr()
|
||||
alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandNR(rm), true)
|
||||
alu.asALU(aluOpAdd, rd, operandNR(rn), operandNR(rm), true)
|
||||
m.insert(alu)
|
||||
return
|
||||
}
|
||||
@ -434,7 +445,7 @@ func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) {
|
||||
func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) {
|
||||
rd = m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
alu := m.allocateInstr()
|
||||
alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandER(rm, ext, 64), true)
|
||||
alu.asALU(aluOpAdd, rd, operandNR(rn), operandER(rm, ext, 64), true)
|
||||
m.insert(alu)
|
||||
return
|
||||
}
|
||||
|
17
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
generated
vendored
17
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
generated
vendored
@ -21,6 +21,8 @@ type (
|
||||
regAlloc regalloc.Allocator
|
||||
regAllocFn *backend.RegAllocFunction[*instruction, *machine]
|
||||
|
||||
amodePool wazevoapi.Pool[addressMode]
|
||||
|
||||
// addendsWorkQueue is used during address lowering, defined here for reuse.
|
||||
addendsWorkQueue wazevoapi.Queue[ssa.Value]
|
||||
addends32 wazevoapi.Queue[addend32]
|
||||
@ -105,6 +107,7 @@ func NewBackend() backend.Machine {
|
||||
spillSlots: make(map[regalloc.VRegID]int64),
|
||||
executableContext: newExecutableContext(),
|
||||
regAlloc: regalloc.NewAllocator(regInfo),
|
||||
amodePool: wazevoapi.NewPool[addressMode](resetAddressMode),
|
||||
}
|
||||
return m
|
||||
}
|
||||
@ -149,6 +152,7 @@ func (m *machine) Reset() {
|
||||
m.maxRequiredStackSizeForCalls = 0
|
||||
m.executableContext.Reset()
|
||||
m.jmpTableTargets = m.jmpTableTargets[:0]
|
||||
m.amodePool.Reset()
|
||||
}
|
||||
|
||||
// SetCurrentABI implements backend.Machine SetCurrentABI.
|
||||
@ -183,9 +187,8 @@ func (m *machine) allocateBrTarget() (nop *instruction, l label) {
|
||||
l = ectx.AllocateLabel()
|
||||
nop = m.allocateInstr()
|
||||
nop.asNop0WithLabel(l)
|
||||
pos := ectx.AllocateLabelPosition(l)
|
||||
pos := ectx.GetOrAllocateLabelPosition(l)
|
||||
pos.Begin, pos.End = nop, nop
|
||||
ectx.LabelPositions[l] = pos
|
||||
return
|
||||
}
|
||||
|
||||
@ -209,7 +212,7 @@ func (m *machine) allocateNop() *instruction {
|
||||
}
|
||||
|
||||
func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) {
|
||||
amode := &i.amode
|
||||
amode := i.getAmode()
|
||||
switch amode.kind {
|
||||
case addressModeKindResultStackSpace:
|
||||
amode.imm += ret0offset
|
||||
@ -281,7 +284,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {
|
||||
switch cur.kind {
|
||||
case nop0:
|
||||
l := cur.nop0Label()
|
||||
if pos, ok := ectx.LabelPositions[l]; ok {
|
||||
if pos := ectx.LabelPositions[l]; pos != nil {
|
||||
pos.BinaryOffset = offset + size
|
||||
}
|
||||
case condBr:
|
||||
@ -428,8 +431,10 @@ func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *
|
||||
func (m *machine) Format() string {
|
||||
ectx := m.executableContext
|
||||
begins := map[*instruction]label{}
|
||||
for l, pos := range ectx.LabelPositions {
|
||||
begins[pos.Begin] = l
|
||||
for _, pos := range ectx.LabelPositions {
|
||||
if pos != nil {
|
||||
begins[pos.Begin] = pos.L
|
||||
}
|
||||
}
|
||||
|
||||
irBlocks := map[label]ssa.BasicBlockID{}
|
||||
|
@ -70,7 +70,7 @@ func (m *machine) setupPrologue() {
|
||||
// +-----------------+ <----- SP
|
||||
// (low address)
|
||||
//
|
||||
_amode := addressModePreOrPostIndex(spVReg,
|
||||
_amode := addressModePreOrPostIndex(m, spVReg,
|
||||
-16, // stack pointer must be 16-byte aligned.
|
||||
true, // Decrement before store.
|
||||
)
|
||||
@ -159,7 +159,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc
|
||||
sizeOfArgRetReg = tmpRegVReg
|
||||
|
||||
subSp := m.allocateInstr()
|
||||
subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true)
|
||||
subSp.asALU(aluOpSub, spVReg, operandNR(spVReg), operandNR(sizeOfArgRetReg), true)
|
||||
cur = linkInstr(cur, subSp)
|
||||
} else {
|
||||
sizeOfArgRetReg = xzrVReg
|
||||
@ -168,7 +168,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc
|
||||
// Saves the return address (lr) and the size_of_arg_ret below the SP.
|
||||
// size_of_arg_ret is used for stack unwinding.
|
||||
pstr := m.allocateInstr()
|
||||
amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */)
|
||||
amode := addressModePreOrPostIndex(m, spVReg, -16, true /* decrement before store */)
|
||||
pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode)
|
||||
cur = linkInstr(cur, pstr)
|
||||
return cur
|
||||
@ -182,7 +182,7 @@ func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction {
|
||||
} else {
|
||||
frameSizeReg = xzrVReg
|
||||
}
|
||||
_amode := addressModePreOrPostIndex(spVReg,
|
||||
_amode := addressModePreOrPostIndex(m, spVReg,
|
||||
-16, // stack pointer must be 16-byte aligned.
|
||||
true, // Decrement before store.
|
||||
)
|
||||
@ -213,7 +213,7 @@ func (m *machine) postRegAlloc() {
|
||||
m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]
|
||||
default:
|
||||
// Removes the redundant copy instruction.
|
||||
if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() {
|
||||
if cur.IsCopy() && cur.rn.realReg() == cur.rd.RealReg() {
|
||||
prev, next := cur.prev, cur.next
|
||||
// Remove the copy instruction.
|
||||
prev.next = next
|
||||
@ -286,16 +286,16 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {
|
||||
for i := range m.clobberedRegs {
|
||||
vr := m.clobberedRegs[l-i] // reverse order to restore.
|
||||
load := m.allocateInstr()
|
||||
amode := addressModePreOrPostIndex(spVReg,
|
||||
amode := addressModePreOrPostIndex(m, spVReg,
|
||||
16, // stack pointer must be 16-byte aligned.
|
||||
false, // Increment after store.
|
||||
)
|
||||
// TODO: pair loads to reduce the number of instructions.
|
||||
switch regTypeToRegisterSizeInBits(vr.RegType()) {
|
||||
case 64: // save int reg.
|
||||
load.asULoad(operandNR(vr), amode, 64)
|
||||
load.asULoad(vr, amode, 64)
|
||||
case 128: // save vector reg.
|
||||
load.asFpuLoad(operandNR(vr), amode, 128)
|
||||
load.asFpuLoad(vr, amode, 128)
|
||||
}
|
||||
cur = linkInstr(cur, load)
|
||||
}
|
||||
@ -317,8 +317,8 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {
|
||||
// SP----> +-----------------+
|
||||
|
||||
ldr := m.allocateInstr()
|
||||
ldr.asULoad(operandNR(lrVReg),
|
||||
addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
|
||||
ldr.asULoad(lrVReg,
|
||||
addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
|
||||
cur = linkInstr(cur, ldr)
|
||||
|
||||
if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 {
|
||||
@ -351,14 +351,14 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi
|
||||
if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok {
|
||||
// sub tmp, sp, #requiredStackSize
|
||||
sub := m.allocateInstr()
|
||||
sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true)
|
||||
sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), immm12op, true)
|
||||
cur = linkInstr(cur, sub)
|
||||
} else {
|
||||
// This case, we first load the requiredStackSize into the temporary register,
|
||||
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
|
||||
// Then subtract it.
|
||||
sub := m.allocateInstr()
|
||||
sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true)
|
||||
sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), operandNR(tmpRegVReg), true)
|
||||
cur = linkInstr(cur, sub)
|
||||
}
|
||||
|
||||
@ -366,16 +366,18 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi
|
||||
|
||||
// ldr tmp2, [executionContext #StackBottomPtr]
|
||||
ldr := m.allocateInstr()
|
||||
ldr.asULoad(operandNR(tmp2), addressMode{
|
||||
amode := m.amodePool.Allocate()
|
||||
*amode = addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: x0VReg, // execution context is always the first argument.
|
||||
imm: wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(),
|
||||
}, 64)
|
||||
}
|
||||
ldr.asULoad(tmp2, amode, 64)
|
||||
cur = linkInstr(cur, ldr)
|
||||
|
||||
// subs xzr, tmp, tmp2
|
||||
subs := m.allocateInstr()
|
||||
subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true)
|
||||
subs.asALU(aluOpSubS, xzrVReg, operandNR(tmpRegVReg), operandNR(tmp2), true)
|
||||
cur = linkInstr(cur, subs)
|
||||
|
||||
// b.ge #imm
|
||||
@ -388,22 +390,25 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi
|
||||
// First load the requiredStackSize into the temporary register,
|
||||
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
|
||||
setRequiredStackSize := m.allocateInstr()
|
||||
setRequiredStackSize.asStore(operandNR(tmpRegVReg),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(),
|
||||
}, 64)
|
||||
amode := m.amodePool.Allocate()
|
||||
*amode = addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(),
|
||||
}
|
||||
setRequiredStackSize.asStore(operandNR(tmpRegVReg), amode, 64)
|
||||
|
||||
cur = linkInstr(cur, setRequiredStackSize)
|
||||
}
|
||||
|
||||
ldrAddress := m.allocateInstr()
|
||||
ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{
|
||||
amode2 := m.amodePool.Allocate()
|
||||
*amode2 = addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: x0VReg, // execution context is always the first argument
|
||||
imm: wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(),
|
||||
}, 64)
|
||||
}
|
||||
ldrAddress.asULoad(tmpRegVReg, amode2, 64)
|
||||
cur = linkInstr(cur, ldrAddress)
|
||||
|
||||
// Then jumps to the stack grow call sequence's address, meaning
|
||||
|
@ -91,7 +91,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft
|
||||
}
|
||||
|
||||
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
|
||||
var amode addressMode
|
||||
var amode *addressMode
|
||||
cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
|
||||
store := m.allocateInstr()
|
||||
store.asStore(operandNR(v), amode, typ.Bits())
|
||||
@ -116,16 +116,16 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af
|
||||
}
|
||||
|
||||
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
|
||||
var amode addressMode
|
||||
var amode *addressMode
|
||||
cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
|
||||
load := m.allocateInstr()
|
||||
switch typ {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
load.asULoad(operandNR(v), amode, typ.Bits())
|
||||
load.asULoad(v, amode, typ.Bits())
|
||||
case ssa.TypeF32, ssa.TypeF64:
|
||||
load.asFpuLoad(operandNR(v), amode, typ.Bits())
|
||||
load.asFpuLoad(v, amode, typ.Bits())
|
||||
case ssa.TypeV128:
|
||||
load.asFpuLoad(operandNR(v), amode, 128)
|
||||
load.asFpuLoad(v, amode, 128)
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
|
12
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
generated
vendored
12
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
generated
vendored
@ -35,7 +35,7 @@ type (
|
||||
iter int
|
||||
reversePostOrderBlocks []RegAllocBlock[I, m]
|
||||
// labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks.
|
||||
labelToRegAllocBlockIndex map[Label]int
|
||||
labelToRegAllocBlockIndex [] /* Label to */ int
|
||||
loopNestingForestRoots []ssa.BasicBlock
|
||||
}
|
||||
|
||||
@ -56,10 +56,9 @@ type (
|
||||
// NewRegAllocFunction returns a new RegAllocFunction.
|
||||
func NewRegAllocFunction[I regalloc.InstrConstraint, M RegAllocFunctionMachine[I]](m M, ssb ssa.Builder, c Compiler) *RegAllocFunction[I, M] {
|
||||
return &RegAllocFunction[I, M]{
|
||||
m: m,
|
||||
ssb: ssb,
|
||||
c: c,
|
||||
labelToRegAllocBlockIndex: make(map[Label]int),
|
||||
m: m,
|
||||
ssb: ssb,
|
||||
c: c,
|
||||
}
|
||||
}
|
||||
|
||||
@ -74,6 +73,9 @@ func (f *RegAllocFunction[I, M]) AddBlock(sb ssa.BasicBlock, l Label, begin, end
|
||||
end: end,
|
||||
id: int(sb.ID()),
|
||||
})
|
||||
if len(f.labelToRegAllocBlockIndex) <= int(l) {
|
||||
f.labelToRegAllocBlockIndex = append(f.labelToRegAllocBlockIndex, make([]int, int(l)-len(f.labelToRegAllocBlockIndex)+1)...)
|
||||
}
|
||||
f.labelToRegAllocBlockIndex[l] = i
|
||||
}
|
||||
|
||||
|
140
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
generated
vendored
140
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
generated
vendored
@ -60,9 +60,8 @@ type (
|
||||
phiDefInstListPool wazevoapi.Pool[phiDefInstList]
|
||||
|
||||
// Followings are re-used during various places.
|
||||
blks []Block
|
||||
reals []RealReg
|
||||
currentOccupants regInUseSet
|
||||
blks []Block
|
||||
reals []RealReg
|
||||
|
||||
// Following two fields are updated while iterating the blocks in the reverse postorder.
|
||||
state state
|
||||
@ -755,7 +754,8 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
|
||||
killSet := a.reals[:0]
|
||||
|
||||
// Gather the set of registers that will be used in the current instruction.
|
||||
for _, use := range instr.Uses(&a.vs) {
|
||||
uses := instr.Uses(&a.vs)
|
||||
for _, use := range uses {
|
||||
if use.IsRealReg() {
|
||||
r := use.RealReg()
|
||||
currentUsedSet = currentUsedSet.add(r)
|
||||
@ -770,7 +770,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
|
||||
}
|
||||
}
|
||||
|
||||
for i, use := range instr.Uses(&a.vs) {
|
||||
for i, use := range uses {
|
||||
if !use.IsRealReg() {
|
||||
vs := s.getVRegState(use.ID())
|
||||
killed := vs.lastUse == pc
|
||||
@ -944,8 +944,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
|
||||
func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) {
|
||||
s := &a.state
|
||||
|
||||
for i := 0; i < 64; i++ {
|
||||
allocated := RealReg(i)
|
||||
for allocated := RealReg(0); allocated < 64; allocated++ {
|
||||
if allocated == addrReg { // If this is the call indirect, we should not touch the addr register.
|
||||
continue
|
||||
}
|
||||
@ -974,11 +973,10 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {
|
||||
bID := blk.ID()
|
||||
blkSt := a.getOrAllocateBlockState(bID)
|
||||
desiredOccupants := &blkSt.startRegs
|
||||
aliveOnRegVRegs := make(map[VReg]RealReg)
|
||||
for i := 0; i < 64; i++ {
|
||||
r := RealReg(i)
|
||||
if v := blkSt.startRegs.get(r); v.Valid() {
|
||||
aliveOnRegVRegs[v] = r
|
||||
var desiredOccupantsSet RegSet
|
||||
for i, v := range desiredOccupants {
|
||||
if v != VRegInvalid {
|
||||
desiredOccupantsSet = desiredOccupantsSet.add(RealReg(i))
|
||||
}
|
||||
}
|
||||
|
||||
@ -987,56 +985,38 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {
|
||||
}
|
||||
|
||||
s.currentBlockID = bID
|
||||
a.updateLiveInVRState(a.getOrAllocateBlockState(bID))
|
||||
a.updateLiveInVRState(blkSt)
|
||||
|
||||
currentOccupants := &a.currentOccupants
|
||||
for i := 0; i < preds; i++ {
|
||||
currentOccupants.reset()
|
||||
if i == blkSt.startFromPredIndex {
|
||||
continue
|
||||
}
|
||||
|
||||
currentOccupantsRev := make(map[VReg]RealReg)
|
||||
pred := blk.Pred(i)
|
||||
predSt := a.getOrAllocateBlockState(pred.ID())
|
||||
for ii := 0; ii < 64; ii++ {
|
||||
r := RealReg(ii)
|
||||
if v := predSt.endRegs.get(r); v.Valid() {
|
||||
if _, ok := aliveOnRegVRegs[v]; !ok {
|
||||
continue
|
||||
}
|
||||
currentOccupants.add(r, v)
|
||||
currentOccupantsRev[v] = r
|
||||
}
|
||||
}
|
||||
|
||||
s.resetAt(predSt)
|
||||
|
||||
// Finds the free registers if any.
|
||||
intTmp, floatTmp := VRegInvalid, VRegInvalid
|
||||
if intFree := s.findAllocatable(
|
||||
a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupants.set,
|
||||
a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupantsSet,
|
||||
); intFree != RealRegInvalid {
|
||||
intTmp = FromRealReg(intFree, RegTypeInt)
|
||||
}
|
||||
if floatFree := s.findAllocatable(
|
||||
a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupants.set,
|
||||
a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupantsSet,
|
||||
); floatFree != RealRegInvalid {
|
||||
floatTmp = FromRealReg(floatFree, RegTypeFloat)
|
||||
}
|
||||
|
||||
if wazevoapi.RegAllocLoggingEnabled {
|
||||
fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
|
||||
}
|
||||
|
||||
for ii := 0; ii < 64; ii++ {
|
||||
r := RealReg(ii)
|
||||
for r := RealReg(0); r < 64; r++ {
|
||||
desiredVReg := desiredOccupants.get(r)
|
||||
if !desiredVReg.Valid() {
|
||||
continue
|
||||
}
|
||||
|
||||
currentVReg := currentOccupants.get(r)
|
||||
currentVReg := s.regsInUse.get(r)
|
||||
if desiredVReg.ID() == currentVReg.ID() {
|
||||
continue
|
||||
}
|
||||
@ -1048,86 +1028,95 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {
|
||||
} else {
|
||||
tmpRealReg = floatTmp
|
||||
}
|
||||
a.reconcileEdge(f, r, pred, currentOccupants, currentOccupantsRev, currentVReg, desiredVReg, tmpRealReg, typ)
|
||||
a.reconcileEdge(f, r, pred, currentVReg, desiredVReg, tmpRealReg, typ)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reconcileEdge reconciles the register state between the current block and the predecessor for the real register `r`.
|
||||
//
|
||||
// - currentVReg is the current VReg value that sits on the register `r`. This can be VRegInvalid if the register is not used at the end of the predecessor.
|
||||
// - desiredVReg is the desired VReg value that should be on the register `r`.
|
||||
// - freeReg is the temporary register that can be used to swap the values, which may or may not be used.
|
||||
// - typ is the register type of the `r`.
|
||||
func (a *Allocator) reconcileEdge(f Function,
|
||||
r RealReg,
|
||||
pred Block,
|
||||
currentOccupants *regInUseSet,
|
||||
currentOccupantsRev map[VReg]RealReg,
|
||||
currentVReg, desiredVReg VReg,
|
||||
freeReg VReg,
|
||||
typ RegType,
|
||||
) {
|
||||
// There are four cases to consider:
|
||||
// 1. currentVReg is valid, but desiredVReg is on the stack.
|
||||
// 2. Both currentVReg and desiredVReg are valid.
|
||||
// 3. Desired is on a different register than `r` and currentReg is not valid.
|
||||
// 4. Desired is on the stack and currentReg is not valid.
|
||||
|
||||
s := &a.state
|
||||
if currentVReg.Valid() {
|
||||
// Both are on reg.
|
||||
er, ok := currentOccupantsRev[desiredVReg]
|
||||
if !ok {
|
||||
desiredState := s.getVRegState(desiredVReg.ID())
|
||||
er := desiredState.r
|
||||
if er == RealRegInvalid {
|
||||
// Case 1: currentVReg is valid, but desiredVReg is on the stack.
|
||||
if wazevoapi.RegAllocLoggingEnabled {
|
||||
fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n",
|
||||
desiredVReg.ID(), a.regInfo.RealRegName(r),
|
||||
)
|
||||
}
|
||||
// This case is that the desired value is on the stack, but currentVReg is on the target register.
|
||||
// We need to move the current value to the stack, and reload the desired value.
|
||||
// We need to move the current value to the stack, and reload the desired value into the register.
|
||||
// TODO: we can do better here.
|
||||
f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion())
|
||||
delete(currentOccupantsRev, currentVReg)
|
||||
s.releaseRealReg(r)
|
||||
|
||||
s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
|
||||
f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
|
||||
currentOccupants.add(r, desiredVReg)
|
||||
currentOccupantsRev[desiredVReg] = r
|
||||
s.useRealReg(r, desiredVReg)
|
||||
return
|
||||
}
|
||||
|
||||
if wazevoapi.RegAllocLoggingEnabled {
|
||||
fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n",
|
||||
desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er),
|
||||
} else {
|
||||
// Case 2: Both currentVReg and desiredVReg are valid.
|
||||
if wazevoapi.RegAllocLoggingEnabled {
|
||||
fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n",
|
||||
desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er),
|
||||
)
|
||||
}
|
||||
// This case, we need to swap the values between the current and desired values.
|
||||
f.SwapBefore(
|
||||
currentVReg.SetRealReg(r),
|
||||
desiredVReg.SetRealReg(er),
|
||||
freeReg,
|
||||
pred.LastInstrForInsertion(),
|
||||
)
|
||||
}
|
||||
f.SwapBefore(
|
||||
currentVReg.SetRealReg(r),
|
||||
desiredVReg.SetRealReg(er),
|
||||
freeReg,
|
||||
pred.LastInstrForInsertion(),
|
||||
)
|
||||
s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
|
||||
currentOccupantsRev[desiredVReg] = r
|
||||
currentOccupantsRev[currentVReg] = er
|
||||
currentOccupants.add(r, desiredVReg)
|
||||
currentOccupants.add(er, currentVReg)
|
||||
if wazevoapi.RegAllocLoggingEnabled {
|
||||
fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
|
||||
s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
|
||||
s.releaseRealReg(r)
|
||||
s.releaseRealReg(er)
|
||||
s.useRealReg(r, desiredVReg)
|
||||
s.useRealReg(er, currentVReg)
|
||||
if wazevoapi.RegAllocLoggingEnabled {
|
||||
fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Desired is on reg, but currently the target register is not used.
|
||||
if wazevoapi.RegAllocLoggingEnabled {
|
||||
fmt.Printf("\t\tv%d is desired to be on %s, current not used\n",
|
||||
desiredVReg.ID(), a.regInfo.RealRegName(r),
|
||||
)
|
||||
}
|
||||
if currentReg, ok := currentOccupantsRev[desiredVReg]; ok {
|
||||
if currentReg := s.getVRegState(desiredVReg.ID()).r; currentReg != RealRegInvalid {
|
||||
// Case 3: Desired is on a different register than `r` and currentReg is not valid.
|
||||
// We simply need to move the desired value to the register.
|
||||
f.InsertMoveBefore(
|
||||
FromRealReg(r, typ),
|
||||
desiredVReg.SetRealReg(currentReg),
|
||||
pred.LastInstrForInsertion(),
|
||||
)
|
||||
currentOccupants.remove(currentReg)
|
||||
s.releaseRealReg(currentReg)
|
||||
} else {
|
||||
// Case 4: Both currentVReg and desiredVReg are not valid.
|
||||
// We simply need to reload the desired value into the register.
|
||||
s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
|
||||
f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
|
||||
}
|
||||
currentOccupantsRev[desiredVReg] = r
|
||||
currentOccupants.add(r, desiredVReg)
|
||||
}
|
||||
|
||||
if wazevoapi.RegAllocLoggingEnabled {
|
||||
fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
|
||||
s.useRealReg(r, desiredVReg)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1169,8 +1158,7 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) {
|
||||
}
|
||||
for pos != definingBlk {
|
||||
st := a.getOrAllocateBlockState(pos.ID())
|
||||
for ii := 0; ii < 64; ii++ {
|
||||
rr := RealReg(ii)
|
||||
for rr := RealReg(0); rr < 64; rr++ {
|
||||
if st.startRegs.get(rr) == v {
|
||||
r = rr
|
||||
// Already in the register, so we can place the spill at the beginning of the block.
|
||||
|
44
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
generated
vendored
44
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
generated
vendored
@ -46,23 +46,24 @@ func (rs RegSet) Range(f func(allocatedRealReg RealReg)) {
|
||||
}
|
||||
}
|
||||
|
||||
type regInUseSet struct {
|
||||
set RegSet
|
||||
vrs [64]VReg
|
||||
type regInUseSet [64]VReg
|
||||
|
||||
func newRegInUseSet() regInUseSet {
|
||||
var ret regInUseSet
|
||||
ret.reset()
|
||||
return ret
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) reset() {
|
||||
rs.set = 0
|
||||
for i := range rs.vrs {
|
||||
rs.vrs[i] = VRegInvalid
|
||||
for i := range rs {
|
||||
rs[i] = VRegInvalid
|
||||
}
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused
|
||||
var ret []string
|
||||
for i := 0; i < 64; i++ {
|
||||
if rs.set&(1<<uint(i)) != 0 {
|
||||
vr := rs.vrs[i]
|
||||
for i, vr := range rs {
|
||||
if vr != VRegInvalid {
|
||||
ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.ID()))
|
||||
}
|
||||
}
|
||||
@ -70,39 +71,28 @@ func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) has(r RealReg) bool {
|
||||
if r >= 64 {
|
||||
return false
|
||||
}
|
||||
return rs.set&(1<<uint(r)) != 0
|
||||
return r < 64 && rs[r] != VRegInvalid
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) get(r RealReg) VReg {
|
||||
if r >= 64 {
|
||||
return VRegInvalid
|
||||
}
|
||||
return rs.vrs[r]
|
||||
return rs[r]
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) remove(r RealReg) {
|
||||
if r >= 64 {
|
||||
return
|
||||
}
|
||||
rs.set &= ^(1 << uint(r))
|
||||
rs.vrs[r] = VRegInvalid
|
||||
rs[r] = VRegInvalid
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) add(r RealReg, vr VReg) {
|
||||
if r >= 64 {
|
||||
return
|
||||
}
|
||||
rs.set |= 1 << uint(r)
|
||||
rs.vrs[r] = vr
|
||||
rs[r] = vr
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) {
|
||||
for i := 0; i < 64; i++ {
|
||||
if rs.set&(1<<uint(i)) != 0 {
|
||||
f(RealReg(i), rs.vrs[i])
|
||||
for i, vr := range rs {
|
||||
if vr != VRegInvalid {
|
||||
f(RealReg(i), vr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user