newlib/winsup/cygwin/math/ceil.S

125 lines
2.4 KiB
ArmAsm

/**
* This file has no copyright assigned and is placed in the Public Domain.
* This file is part of the mingw-w64 runtime package.
* No warranty is given; refer to the file DISCLAIMER.PD within this package.
*/
#include <_mingw_mac.h>
.file "ceil.S"
.text
.align 4
.globl __MINGW_USYMBOL(ceil)
.def __MINGW_USYMBOL(ceil); .scl 2; .type 32; .endef
#ifdef __x86_64__
.seh_proc __MINGW_USYMBOL(ceil)
#endif
__MINGW_USYMBOL(ceil):
#if defined(_AMD64_) || defined(__x86_64__)
.seh_endprologue
movd %xmm0, %rax
movq %rax, %rcx
sarq $52, %rcx
andl $2047, %ecx
subl $1023, %ecx
cmpl $51, %ecx
jg .is_intnaninf
/* Is x zero? */
testq %rax, %rax
je .ret_org
/* Is x signed? */
testl %ecx, %ecx
js .signed_val
/* Is x integral? */
movabsq $4503599627370495, %rdx
sarq %cl, %rdx
testq %rax, %rdx
je .ret_org
addsd .huge(%rip), %xmm0
ucomisd .zero(%rip), %xmm0
jbe .doret
testq %rax, %rax
jle .l1
/* inexact ... */
movabsq $4503599627370496, %r8
shrq %cl, %r8
addq %r8, %rax
.l1:
notq %rdx
andq %rdx, %rax
.doret:
movd %rax, %xmm0
ret
.p2align 4,,10
.signed_val:
addsd .huge(%rip), %xmm0
ucomisd .zero(%rip), %xmm0
jbe .doret2
testq %rax, %rax
movabsq $4607182418800017408, %rdx
movabsq $-9223372036854775808, %rax
cmovns %rdx, %rax
.p2align 4,,10
.doret2:
movd %rax, %xmm0
ret
.p2align 4,,10
.is_intnaninf:
/* Is Nan or Inf? */
cmpl $1024, %ecx
je .ret_naninf
.p2align 4,,10
.ret_org:
/* return x. */
rep
ret
.p2align 4,,10
.ret_naninf:
/* return x + x; */
addsd %xmm0, %xmm0
ret
.seh_endproc
/* local data. */
.section .rdata,"dr"
.align 8
.huge:
.long -2013235812
.long 2117592124
.align 8
.zero:
.long 0
.long 0
#elif defined(_ARM_) || defined(__arm__)
vmrs r1, fpscr
bic r0, r1, #0x00c00000
orr r0, r0, #0x00400000 /* Round towards Plus Infinity */
vmsr fpscr, r0
vcvtr.s32.f64 s0, d0
vcvt.f64.s32 d0, s0
vmsr fpscr, r1
bx lr
#elif defined(_X86_) || defined(__i386__)
fldl 4(%esp)
subl $8,%esp
fstcw 4(%esp) /* store fpu control word */
/* We use here %edx although only the low 1 bits are defined.
But none of the operations should care and they are faster
than the 16 bit operations. */
movl $0x0800,%edx /* round towards +oo */
orl 4(%esp),%edx
andl $0xfbff,%edx
movl %edx,(%esp)
fldcw (%esp) /* load modified control word */
frndint /* round */
fldcw 4(%esp) /* restore original control word */
addl $8,%esp
ret
#endif