547 lines
11 KiB
C
547 lines
11 KiB
C
|
/*
|
||
|
* Copyright (C) 2016 Giacomo Tesio <giacomo@tesio.it>
|
||
|
*
|
||
|
* This file is part of the UCB release of Plan 9. It is subject to the license
|
||
|
* terms in the LICENSE file found in the top-level directory of this
|
||
|
* distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
|
||
|
* part of the UCB release of Plan 9, including this file, may be copied,
|
||
|
* modified, propagated, or distributed except according to the terms contained
|
||
|
* in the LICENSE file.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* SIMD Floating Point.
|
||
|
* Assembler support to get at the individual instructions
|
||
|
* is in l64fpu.s.
|
||
|
* There are opportunities to be lazier about saving and
|
||
|
* restoring the state and allocating the storage needed.
|
||
|
*/
|
||
|
#include "u.h"
|
||
|
#include "../port/lib.h"
|
||
|
#include "mem.h"
|
||
|
#include "dat.h"
|
||
|
#include "fns.h"
|
||
|
|
||
|
#include "amd64.h"
|
||
|
#include "ureg.h"
|
||
|
|
||
|
enum { /* FCW, FSW and MXCSR */
|
||
|
I = 0x00000001, /* Invalid-Operation */
|
||
|
D = 0x00000002, /* Denormalized-Operand */
|
||
|
Z = 0x00000004, /* Zero-Divide */
|
||
|
O = 0x00000008, /* Overflow */
|
||
|
U = 0x00000010, /* Underflow */
|
||
|
P = 0x00000020, /* Precision */
|
||
|
};
|
||
|
|
||
|
enum { /* FCW */
|
||
|
PCs = 0x00000000, /* Precision Control -Single */
|
||
|
PCd = 0x00000200, /* -Double */
|
||
|
PCde = 0x00000300, /* -Double Extended */
|
||
|
RCn = 0x00000000, /* Rounding Control -Nearest */
|
||
|
RCd = 0x00000400, /* -Down */
|
||
|
RCu = 0x00000800, /* -Up */
|
||
|
RCz = 0x00000C00, /* -Toward Zero */
|
||
|
};
|
||
|
|
||
|
enum { /* FSW */
|
||
|
Sff = 0x00000040, /* Stack Fault Flag */
|
||
|
Es = 0x00000080, /* Error Summary Status */
|
||
|
C0 = 0x00000100, /* ZF - Condition Code Bits */
|
||
|
C1 = 0x00000200, /* O/U# */
|
||
|
C2 = 0x00000400, /* PF */
|
||
|
C3 = 0x00004000, /* ZF */
|
||
|
B = 0x00008000, /* Busy */
|
||
|
};
|
||
|
|
||
|
enum { /* MXCSR */
|
||
|
Daz = 0x00000040, /* Denormals are Zeros */
|
||
|
Im = 0x00000080, /* I Mask */
|
||
|
Dm = 0x00000100, /* D Mask */
|
||
|
Zm = 0x00000200, /* Z Mask */
|
||
|
Om = 0x00000400, /* O Mask */
|
||
|
Um = 0x00000800, /* U Mask */
|
||
|
Pm = 0x00001000, /* P Mask */
|
||
|
Rn = 0x00000000, /* Round to Nearest */
|
||
|
Rd = 0x00002000, /* Round Down */
|
||
|
Ru = 0x00004000, /* Round Up */
|
||
|
Rz = 0x00006000, /* Round toward Zero */
|
||
|
Fz = 0x00008000, /* Flush to Zero for Um */
|
||
|
};
|
||
|
|
||
|
enum { /* FPU.state */
|
||
|
Init = 0, /* The FPU has not been used */
|
||
|
Busy = 1, /* The FPU is being used */
|
||
|
Idle = 2, /* The FPU has been used */
|
||
|
|
||
|
Hold = 4, /* Handling an FPU note */
|
||
|
};
|
||
|
|
||
|
extern void _clts(void);
|
||
|
extern void _fldcw(uint16_t*);
|
||
|
extern void _fnclex(void);
|
||
|
extern void _fninit(void);
|
||
|
extern void _fxrstor(Fxsave*);
|
||
|
extern void _fxsave(Fxsave*);
|
||
|
extern void _fwait(void);
|
||
|
extern void _ldmxcsr(uint32_t*);
|
||
|
extern void _stts(void);
|
||
|
|
||
|
int
|
||
|
fpudevprocio(Proc* proc, void* a, int32_t n, uintptr_t offset, int write)
|
||
|
{
|
||
|
uint8_t *p;
|
||
|
|
||
|
/*
|
||
|
* Called from procdevtab.read and procdevtab.write
|
||
|
* allow user process access to the FPU registers.
|
||
|
* This is the only FPU routine which is called directly
|
||
|
* from the port code; it would be nice to have dynamic
|
||
|
* creation of entries in the device file trees...
|
||
|
*/
|
||
|
if(offset >= sizeof(Fxsave))
|
||
|
return 0;
|
||
|
if((p = proc->FPU.fpusave) == nil)
|
||
|
return 0;
|
||
|
switch(write){
|
||
|
default:
|
||
|
if(offset+n > sizeof(Fxsave))
|
||
|
n = sizeof(Fxsave) - offset;
|
||
|
memmove(p+offset, a, n);
|
||
|
break;
|
||
|
case 0:
|
||
|
if(offset+n > sizeof(Fxsave))
|
||
|
n = sizeof(Fxsave) - offset;
|
||
|
memmove(a, p+offset, n);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return n;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fpunotify(Ureg* u)
|
||
|
{
|
||
|
/*
|
||
|
* Called when a note is about to be delivered to a
|
||
|
* user process, usually at the end of a system call.
|
||
|
* Note handlers are not allowed to use the FPU so
|
||
|
* the state is marked (after saving if necessary) and
|
||
|
* checked in the Device Not Available handler.
|
||
|
*/
|
||
|
if(up->FPU.fpustate == Busy){
|
||
|
_fxsave(up->FPU.fpusave);
|
||
|
_stts();
|
||
|
up->FPU.fpustate = Idle;
|
||
|
}
|
||
|
up->FPU.fpustate |= Hold;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fpunoted(void)
|
||
|
{
|
||
|
/*
|
||
|
* Called from sysnoted() via the machine-dependent
|
||
|
* noted() routine.
|
||
|
* Clear the flag set above in fpunotify().
|
||
|
*/
|
||
|
up->FPU.fpustate &= ~Hold;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fpusysrfork(Ureg* u)
|
||
|
{
|
||
|
/*
|
||
|
* Called early in the non-interruptible path of
|
||
|
* sysrfork() via the machine-dependent syscall() routine.
|
||
|
* Save the state so that it can be easily copied
|
||
|
* to the child process later.
|
||
|
*/
|
||
|
if(up->FPU.fpustate != Busy)
|
||
|
return;
|
||
|
|
||
|
_fxsave(up->FPU.fpusave);
|
||
|
_stts();
|
||
|
up->FPU.fpustate = Idle;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fpusysrforkchild(Proc* child, Proc* parent)
|
||
|
{
|
||
|
/*
|
||
|
* Called later in sysrfork() via the machine-dependent
|
||
|
* sysrforkchild() routine.
|
||
|
* Copy the parent FPU state to the child.
|
||
|
*/
|
||
|
child->FPU.fpustate = parent->FPU.fpustate;
|
||
|
child->FPU.fpusave = (void*)((PTR2UINT(up->FPU.fxsave) + 15) & ~15);
|
||
|
if(child->FPU.fpustate == Init)
|
||
|
return;
|
||
|
|
||
|
memmove(child->FPU.fpusave, parent->FPU.fpusave, sizeof(Fxsave));
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fpuprocsave(Proc* p)
|
||
|
{
|
||
|
/*
|
||
|
* Called from sched() and sleep() via the machine-dependent
|
||
|
* procsave() routine.
|
||
|
* About to go in to the scheduler.
|
||
|
* If the process wasn't using the FPU
|
||
|
* there's nothing to do.
|
||
|
*/
|
||
|
if(p->FPU.fpustate != Busy)
|
||
|
return;
|
||
|
|
||
|
/*
|
||
|
* The process is dead so clear and disable the FPU
|
||
|
* and set the state for whoever gets this proc struct
|
||
|
* next.
|
||
|
*/
|
||
|
if(p->state == Moribund){
|
||
|
_clts();
|
||
|
_fnclex();
|
||
|
_stts();
|
||
|
p->FPU.fpustate = Init;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Save the FPU state without handling pending
|
||
|
* unmasked exceptions and disable. Postnote() can't
|
||
|
* be called here as sleep() already has up->rlock,
|
||
|
* so the handling of pending exceptions is delayed
|
||
|
* until the process runs again and generates a
|
||
|
* Device Not Available exception fault to activate
|
||
|
* the FPU.
|
||
|
*/
|
||
|
_fxsave(p->FPU.fpusave);
|
||
|
_stts();
|
||
|
p->FPU.fpustate = Idle;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fpuprocrestore(Proc* p)
|
||
|
{
|
||
|
/*
|
||
|
* The process has been rescheduled and is about to run.
|
||
|
* Nothing to do here right now. If the process tries to use
|
||
|
* the FPU again it will cause a Device Not Available
|
||
|
* exception and the state will then be restored.
|
||
|
*/
|
||
|
USED(p);
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fpusysprocsetup(Proc* p)
|
||
|
{
|
||
|
/*
|
||
|
* Disable the FPU.
|
||
|
* Called from sysexec() via sysprocsetup() to
|
||
|
* set the FPU for the new process.
|
||
|
*/
|
||
|
if(p->FPU.fpustate != Init){
|
||
|
_clts();
|
||
|
_fnclex();
|
||
|
_stts();
|
||
|
p->FPU.fpustate = Init;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
acfpusysprocsetup(Proc *p)
|
||
|
{
|
||
|
if(p->FPU.fpustate == Init){
|
||
|
/* The FPU is initialized in the TC but we must initialize
|
||
|
* it in the AC.
|
||
|
*/
|
||
|
p->FPU.fpustate = Idle;
|
||
|
fpusysprocsetup(p);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static char*
|
||
|
fpunote(void)
|
||
|
{
|
||
|
uint16_t fsw;
|
||
|
Fxsave *fpusave;
|
||
|
char *cm;
|
||
|
|
||
|
/*
|
||
|
* The Sff bit is sticky, meaning it should be explicitly
|
||
|
* cleared or there's no way to tell if the exception was an
|
||
|
* invalid operation or a stack fault.
|
||
|
*/
|
||
|
fpusave = up->FPU.fpusave;
|
||
|
fsw = (fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I);
|
||
|
if(fsw & I){
|
||
|
if(fsw & Sff){
|
||
|
if(fsw & C1)
|
||
|
cm = "Stack Overflow";
|
||
|
else
|
||
|
cm = "Stack Underflow";
|
||
|
}
|
||
|
else
|
||
|
cm = "Invalid Operation";
|
||
|
}
|
||
|
else if(fsw & D)
|
||
|
cm = "Denormal Operand";
|
||
|
else if(fsw & Z)
|
||
|
cm = "Divide-By-Zero";
|
||
|
else if(fsw & O)
|
||
|
cm = "Numeric Overflow";
|
||
|
else if(fsw & U)
|
||
|
cm = "Numeric Underflow";
|
||
|
else if(fsw & P)
|
||
|
cm = "Precision";
|
||
|
else
|
||
|
cm = "Unknown";
|
||
|
|
||
|
snprint(up->genbuf, sizeof(up->genbuf),
|
||
|
"sys: fp: %s Exception ipo=%#llux fsw=%#ux",
|
||
|
cm, fpusave->rip, fsw);
|
||
|
return up->genbuf;
|
||
|
}
|
||
|
|
||
|
char*
|
||
|
xfpuxf(Ureg* ureg, void* v)
|
||
|
{
|
||
|
uint32_t mxcsr;
|
||
|
Fxsave *fpusave;
|
||
|
char *cm;
|
||
|
|
||
|
/*
|
||
|
* #XF - SIMD Floating Point Exception (Vector 18).
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* Save FPU state to check out the error.
|
||
|
*/
|
||
|
fpusave = up->FPU.fpusave;
|
||
|
_fxsave(fpusave);
|
||
|
_stts();
|
||
|
up->FPU.fpustate = Idle;
|
||
|
|
||
|
if(ureg->ip & KZERO)
|
||
|
panic("#MF: ip=%#p", ureg->ip);
|
||
|
|
||
|
/*
|
||
|
* Notify the user process.
|
||
|
* The path here is similar to the x87 path described
|
||
|
* in fpupostnote above but without the fpupostnote()
|
||
|
* call.
|
||
|
*/
|
||
|
mxcsr = fpusave->mxcsr;
|
||
|
if((mxcsr & (Im|I)) == I)
|
||
|
cm = "Invalid Operation";
|
||
|
else if((mxcsr & (Dm|D)) == D)
|
||
|
cm = "Denormal Operand";
|
||
|
else if((mxcsr & (Zm|Z)) == Z)
|
||
|
cm = "Divide-By-Zero";
|
||
|
else if((mxcsr & (Om|O)) == O)
|
||
|
cm = "Numeric Overflow";
|
||
|
else if((mxcsr & (Um|U)) == U)
|
||
|
cm = "Numeric Underflow";
|
||
|
else if((mxcsr & (Pm|P)) == P)
|
||
|
cm = "Precision";
|
||
|
else
|
||
|
cm = "Unknown";
|
||
|
|
||
|
snprint(up->genbuf, sizeof(up->genbuf),
|
||
|
"sys: fp: %s Exception mxcsr=%#ux", cm, mxcsr);
|
||
|
return up->genbuf;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fpuxf(Ureg *ureg, void *p)
|
||
|
{
|
||
|
char *n;
|
||
|
|
||
|
n = xfpuxf(ureg, p);
|
||
|
if(n != nil)
|
||
|
postnote(up, 1, n, NDebug);
|
||
|
}
|
||
|
|
||
|
char*
|
||
|
acfpuxf(Ureg *ureg, void *p)
|
||
|
{
|
||
|
return xfpuxf(ureg, p);
|
||
|
}
|
||
|
|
||
|
static char*
|
||
|
xfpumf(Ureg* ureg, void* v)
|
||
|
{
|
||
|
Fxsave *fpusave;
|
||
|
|
||
|
/*
|
||
|
* #MF - x87 Floating Point Exception Pending (Vector 16).
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* Save FPU state to check out the error.
|
||
|
*/
|
||
|
fpusave = up->FPU.fpusave;
|
||
|
_fxsave(fpusave);
|
||
|
_stts();
|
||
|
up->FPU.fpustate = Idle;
|
||
|
|
||
|
if(ureg->ip & KZERO)
|
||
|
panic("#MF: ip=%#p rip=%#p", ureg->ip, fpusave->rip);
|
||
|
|
||
|
/*
|
||
|
* Notify the user process.
|
||
|
* The path here is
|
||
|
* call trap->fpumf->fpupostnote->postnote
|
||
|
* return ->fpupostnote->fpumf->trap
|
||
|
* call notify->fpunotify
|
||
|
* return ->notify
|
||
|
* then either
|
||
|
* call pexit
|
||
|
* or
|
||
|
* return ->trap
|
||
|
* return ->user note handler
|
||
|
*/
|
||
|
return fpunote();
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fpumf(Ureg *ureg, void *p)
|
||
|
{
|
||
|
char *n;
|
||
|
|
||
|
n = xfpumf(ureg, p);
|
||
|
if(n != nil)
|
||
|
postnote(up, 1, n, NDebug);
|
||
|
}
|
||
|
|
||
|
char*
|
||
|
acfpumf(Ureg *ureg, void *p)
|
||
|
{
|
||
|
return xfpumf(ureg, p);
|
||
|
}
|
||
|
|
||
|
static char*
|
||
|
xfpunm(Ureg* ureg, void* v)
|
||
|
{
|
||
|
Fxsave *fpusave;
|
||
|
|
||
|
/*
|
||
|
* #NM - Device Not Available (Vector 7).
|
||
|
*/
|
||
|
if(up == nil)
|
||
|
panic("#NM: fpu in kernel: ip %#p\n", ureg->ip);
|
||
|
|
||
|
/*
|
||
|
* Someone tried to use the FPU in a note handler.
|
||
|
* That's a no-no.
|
||
|
*/
|
||
|
if(up->FPU.fpustate & Hold)
|
||
|
return "sys: floating point in note handler";
|
||
|
|
||
|
if(ureg->ip & KZERO)
|
||
|
panic("#NM: proc %d %s state %d ip %#p\n",
|
||
|
up->pid, up->text, up->FPU.fpustate, ureg->ip);
|
||
|
|
||
|
switch(up->FPU.fpustate){
|
||
|
case Busy:
|
||
|
default:
|
||
|
panic("#NM: state %d ip %#p\n", up->FPU.fpustate, ureg->ip);
|
||
|
break;
|
||
|
case Init:
|
||
|
/*
|
||
|
* A process tries to use the FPU for the
|
||
|
* first time and generates a 'device not available'
|
||
|
* exception.
|
||
|
* Turn the FPU on and initialise it for use.
|
||
|
* Set the precision and mask the exceptions
|
||
|
* we don't care about from the generic Mach value.
|
||
|
*/
|
||
|
_clts();
|
||
|
_fninit();
|
||
|
_fwait();
|
||
|
_fldcw(&m->FPU.fcw);
|
||
|
_ldmxcsr(&m->FPU.mxcsr);
|
||
|
up->FPU.fpusave = (void*)((PTR2UINT(up->FPU.fxsave) + 15) & ~15);
|
||
|
up->FPU.fpustate = Busy;
|
||
|
break;
|
||
|
case Idle:
|
||
|
/*
|
||
|
* Before restoring the state, check for any pending
|
||
|
* exceptions, there's no way to restore the state without
|
||
|
* generating an unmasked exception.
|
||
|
*/
|
||
|
fpusave = up->FPU.fpusave;
|
||
|
if((fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I))
|
||
|
return fpunote();
|
||
|
|
||
|
/*
|
||
|
* Sff is sticky.
|
||
|
*/
|
||
|
fpusave->fcw &= ~Sff;
|
||
|
_clts();
|
||
|
_fxrstor(fpusave);
|
||
|
up->FPU.fpustate = Busy;
|
||
|
break;
|
||
|
}
|
||
|
return nil;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fpunm(Ureg *ureg, void *p)
|
||
|
{
|
||
|
char *n;
|
||
|
|
||
|
n = xfpunm(ureg, p);
|
||
|
if(n != nil)
|
||
|
postnote(up, 1, n, NDebug);
|
||
|
}
|
||
|
|
||
|
char*
|
||
|
acfpunm(Ureg *ureg, void *p)
|
||
|
{
|
||
|
return xfpunm(ureg, p);
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fpuinit(void)
|
||
|
{
|
||
|
uint64_t r;
|
||
|
Fxsave *fxsave;
|
||
|
uint8_t buf[sizeof(Fxsave)+15];
|
||
|
|
||
|
/*
|
||
|
* It's assumed there is an integrated FPU, so Em is cleared;
|
||
|
*/
|
||
|
r = cr0get();
|
||
|
r &= ~(Ts|Em);
|
||
|
r |= Ne|Mp;
|
||
|
cr0put(r);
|
||
|
|
||
|
r = cr4get();
|
||
|
r |= Osxmmexcpt|Osfxsr;
|
||
|
cr4put(r);
|
||
|
|
||
|
_fninit();
|
||
|
fxsave = (Fxsave*)((PTR2UINT(buf) + 15) & ~15);
|
||
|
memset(fxsave, 0, sizeof(Fxsave));
|
||
|
_fxsave(fxsave);
|
||
|
m->FPU.fcw = RCn|PCd|P|U|D;
|
||
|
if(fxsave->mxcsrmask == 0)
|
||
|
m->FPU.mxcsrmask = 0x0000FFBF;
|
||
|
else
|
||
|
m->FPU.mxcsrmask = fxsave->mxcsrmask;
|
||
|
m->FPU.mxcsr = (Rn|Pm|Um|Dm) & m->FPU.mxcsrmask;
|
||
|
_stts();
|
||
|
|
||
|
if(m->machno != 0)
|
||
|
return;
|
||
|
|
||
|
/*
|
||
|
* Set up the exception handlers.
|
||
|
*/
|
||
|
trapenable(IdtNM, fpunm, 0, "#NM");
|
||
|
trapenable(IdtMF, fpumf, 0, "#MF");
|
||
|
trapenable(IdtXF, fpuxf, 0, "#XF");
|
||
|
}
|