125 lines
2.4 KiB
ArmAsm
125 lines
2.4 KiB
ArmAsm
/**
|
|
* This file has no copyright assigned and is placed in the Public Domain.
|
|
* This file is part of the mingw-w64 runtime package.
|
|
* No warranty is given; refer to the file DISCLAIMER.PD within this package.
|
|
*/
|
|
#include <_mingw_mac.h>
|
|
|
|
.file "ceil.S"
|
|
.text
|
|
.align 4
|
|
.globl __MINGW_USYMBOL(ceil)
|
|
.def __MINGW_USYMBOL(ceil); .scl 2; .type 32; .endef
|
|
#ifdef __x86_64__
|
|
.seh_proc __MINGW_USYMBOL(ceil)
|
|
#endif
|
|
|
|
__MINGW_USYMBOL(ceil):
|
|
#if defined(_AMD64_) || defined(__x86_64__)
|
|
.seh_endprologue
|
|
movd %xmm0, %rax
|
|
movq %rax, %rcx
|
|
sarq $52, %rcx
|
|
andl $2047, %ecx
|
|
subl $1023, %ecx
|
|
cmpl $51, %ecx
|
|
jg .is_intnaninf
|
|
/* Is x zero? */
|
|
testq %rax, %rax
|
|
je .ret_org
|
|
/* Is x signed? */
|
|
testl %ecx, %ecx
|
|
js .signed_val
|
|
/* Is x integral? */
|
|
movabsq $4503599627370495, %rdx
|
|
sarq %cl, %rdx
|
|
testq %rax, %rdx
|
|
je .ret_org
|
|
addsd .huge(%rip), %xmm0
|
|
ucomisd .zero(%rip), %xmm0
|
|
jbe .doret
|
|
testq %rax, %rax
|
|
jle .l1
|
|
/* inexact ... */
|
|
movabsq $4503599627370496, %r8
|
|
shrq %cl, %r8
|
|
addq %r8, %rax
|
|
.l1:
|
|
notq %rdx
|
|
andq %rdx, %rax
|
|
.doret:
|
|
movd %rax, %xmm0
|
|
ret
|
|
.p2align 4,,10
|
|
.signed_val:
|
|
addsd .huge(%rip), %xmm0
|
|
ucomisd .zero(%rip), %xmm0
|
|
jbe .doret2
|
|
testq %rax, %rax
|
|
movabsq $4607182418800017408, %rdx
|
|
movabsq $-9223372036854775808, %rax
|
|
cmovns %rdx, %rax
|
|
.p2align 4,,10
|
|
.doret2:
|
|
movd %rax, %xmm0
|
|
ret
|
|
|
|
.p2align 4,,10
|
|
.is_intnaninf:
|
|
/* Is Nan or Inf? */
|
|
cmpl $1024, %ecx
|
|
je .ret_naninf
|
|
.p2align 4,,10
|
|
.ret_org:
|
|
/* return x. */
|
|
rep
|
|
ret
|
|
.p2align 4,,10
|
|
.ret_naninf:
|
|
/* return x + x; */
|
|
addsd %xmm0, %xmm0
|
|
ret
|
|
.seh_endproc
|
|
|
|
/* local data. */
|
|
.section .rdata,"dr"
|
|
.align 8
|
|
.huge:
|
|
.long -2013235812
|
|
.long 2117592124
|
|
.align 8
|
|
.zero:
|
|
.long 0
|
|
.long 0
|
|
#elif defined(_ARM_) || defined(__arm__)
|
|
vmrs r1, fpscr
|
|
bic r0, r1, #0x00c00000
|
|
orr r0, r0, #0x00400000 /* Round towards Plus Infinity */
|
|
vmsr fpscr, r0
|
|
vcvtr.s32.f64 s0, d0
|
|
vcvt.f64.s32 d0, s0
|
|
vmsr fpscr, r1
|
|
bx lr
|
|
#elif defined(_X86_) || defined(__i386__)
|
|
fldl 4(%esp)
|
|
subl $8,%esp
|
|
|
|
fstcw 4(%esp) /* store fpu control word */
|
|
|
|
/* We use here %edx although only the low 1 bits are defined.
|
|
But none of the operations should care and they are faster
|
|
than the 16 bit operations. */
|
|
movl $0x0800,%edx /* round towards +oo */
|
|
orl 4(%esp),%edx
|
|
andl $0xfbff,%edx
|
|
movl %edx,(%esp)
|
|
fldcw (%esp) /* load modified control word */
|
|
|
|
frndint /* round */
|
|
|
|
fldcw 4(%esp) /* restore original control word */
|
|
|
|
addl $8,%esp
|
|
ret
|
|
#endif
|