x86_64 math asm, long double functions only

this has not been tested heavily, but it's known to at least assemble and run in basic usage cases. it's nearly identical to the corresponding i386 code, and thus expected to be just as correct or just as incorrect.
2012-03-20 23:29:24 -04:00 · 2012-03-20 23:29:24 -04:00 · 30df206cb0
parent 80949ccdc6
commit 30df206cb0
17 changed files with 239 additions and 0 deletions
--- a/src/math/x86_64/acosl.s
+++ b/src/math/x86_64/acosl.s
@ -0,0 +1,20 @@
+.global acosl
+.type acosl,@function
+acosl:
+	fldt 8(%rsp)
+	fld1
+	fld %st(1)
+	fld1
+	fsubp
+	fxch %st(2)
+	faddp
+	fdivp
+	fsqrt
+	fld1
+	fxch %st(1)
+	fpatan
+	fld1
+	fld1
+	faddp
+	fmulp
+	ret
--- a/src/math/x86_64/asinl.s
+++ b/src/math/x86_64/asinl.s
@ -0,0 +1,12 @@
+.global asinl
+.type asinl,@function
+asinl:
+	fldt 8(%rsp)
+1:	fld %st(0)
+	fmul %st(0)
+	fld1
+	fsubp %st(1)
+	fsqrt
+	fpatan
+	ret
+
--- a/src/math/x86_64/atan2l.s
+++ b/src/math/x86_64/atan2l.s
@ -0,0 +1,7 @@
+.global atan2l
+.type atan2l,@function
+atan2l:
+	fldt 8(%rsp)
+	fldt 24(%rsp)
+	fpatan
+	ret
--- a/src/math/x86_64/atanl.s
+++ b/src/math/x86_64/atanl.s
@ -0,0 +1,7 @@
+.global atanl
+.type atanl,@function
+atanl:
+	fldt 8(%rsp)
+	fld1
+	fpatan
+	ret
--- a/src/math/x86_64/ceill.s
+++ b/src/math/x86_64/ceill.s
@ -0,0 +1 @@
+# see floor.s
--- a/src/math/x86_64/exp2l.s
+++ b/src/math/x86_64/exp2l.s
@ -0,0 +1 @@
+# see expl.s
--- a/src/math/x86_64/expl.s
+++ b/src/math/x86_64/expl.s
@ -0,0 +1,85 @@
+.global expm1l
+.type expm1l,@function
+expm1l:
+	fldt 8(%rsp)
+1:	fldl2e
+	fmulp
+	fld1
+	fld %st(1)
+	fabs
+	fucom %st(1)
+	fnstsw %ax
+	fstp %st(0)
+	fstp %st(0)
+	sahf
+	ja 1f
+	f2xm1
+	ret
+1:	push %rax
+	call 1f
+	pop %rax
+	fld1
+	fsubrp
+	ret
+
+.global expl
+.type expl,@function
+expl:
+	fldt 8(%rsp)
+	fldl2e
+	fmulp
+	jmp 1f
+
+.global exp2l
+.type exp2l,@function
+exp2l:
+	fldt 8(%rsp)
+1:	mov $0x467ff000,%eax
+	mov %eax,-16(%rsp)
+	mov $0x80000000,%eax
+	mov %eax,-20(%rsp)
+	xor %eax,%eax
+	mov %eax,-24(%rsp)
+	flds -16(%rsp)
+	fld %st(1)
+	fabs
+	fucom %st(1)
+	fnstsw
+	fstp %st(0)
+	fstp %st(0)
+	sahf
+	ja 2f
+	fld %st(0)
+	fistpl -16(%rsp)
+	fildl -16(%rsp)
+	fxch %st(1)
+	fsub %st(1)
+	mov $0x3fff,%eax
+	add %eax,-16(%rsp)
+	f2xm1
+	fld1
+	faddp
+	fldt -24(%rsp)
+	fmulp
+	fstp %st(1)
+	ret
+
+2:	fld %st(0)
+	fstpt -24(%rsp)
+	mov -15(%rsp),%ah
+	and $0x7f,%ah
+	cmp $0x7f,%ah
+	jne 1f
+	decb -15(%rsp)
+	fstp %st(0)
+	fldt -24(%rsp)
+1:	fld %st(0)
+	frndint
+	fxch %st(1)
+	fsub %st(1)
+	f2xm1
+	fld1
+	faddp
+	fscale
+	fstp %st(1)
+	ret
--- a/src/math/x86_64/expm1l.s
+++ b/src/math/x86_64/expm1l.s
@ -0,0 +1 @@
+# see expm1.s
--- a/src/math/x86_64/floorl.s
+++ b/src/math/x86_64/floorl.s
@ -0,0 +1,27 @@
+.global floorl
+.type floorl,@function
+floorl:
+	fldt 8(%rsp)
+1:	mov $0x7,%al
+1:	fstcw 8(%rsp)
+	mov 9(%rsp),%ah
+	mov %al,9(%rsp)
+	fldcw 8(%rsp)
+	frndint
+	mov %ah,9(%rsp)
+	fldcw 8(%rsp)
+	ret
+
+.global ceill
+.type ceill,@function
+ceill:
+	fldt 8(%rsp)
+	mov $0xb,%al
+	jmp 1b
+
+.global truncl
+.type truncl,@function
+truncl:
+	fldt 8(%rsp)
+	mov $0xf,%al
+	jmp 1b
--- a/src/math/x86_64/llrintl.s
+++ b/src/math/x86_64/llrintl.s
@ -0,0 +1,7 @@
+.global llrintl
+.type llrintl,@function
+llrintl:
+	fldt 8(%rsp)
+	fistpq 8(%rsp)
+	mov 8(%rsp),%rax
+	ret
--- a/src/math/x86_64/log10l.s
+++ b/src/math/x86_64/log10l.s
@ -0,0 +1,7 @@
+.global log10l
+.type log10l,@function
+log10l:
+	fldlg2
+	fldt 8(%rsp)
+	fyl2x
+	ret
--- a/src/math/x86_64/log1pl.s
+++ b/src/math/x86_64/log1pl.s
@ -0,0 +1,15 @@
+.global log1pl
+.type log1pl,@function
+log1pl:
+	mov 14(%rsp),%eax
+	fldln2
+	and $0x7fffffff,%eax
+	fldt 8(%rsp)
+	cmp $0x3ffd9400,%eax
+	ja 1f
+	fyl2xp1
+	ret
+1:	fld1
+	faddp
+	fyl2x
+	ret
--- a/src/math/x86_64/log2l.s
+++ b/src/math/x86_64/log2l.s
@ -0,0 +1,7 @@
+.global log2l
+.type log2l,@function
+log2l:
+	fld1
+	fldt 8(%rsp)
+	fyl2x
+	ret
--- a/src/math/x86_64/logl.s
+++ b/src/math/x86_64/logl.s
@ -0,0 +1,7 @@
+.global logl
+.type logl,@function
+logl:
+	fldln2
+	fldt 8(%rsp)
+	fyl2x
+	ret
--- a/src/math/x86_64/lrintl.s
+++ b/src/math/x86_64/lrintl.s
@ -0,0 +1,7 @@
+.global lrintl
+.type lrintl,@function
+lrintl:
+	fldt 8(%rsp)
+	fistpl 8(%rsp)
+	mov 8(%rsp),%eax
+	ret
--- a/src/math/x86_64/modfl.s
+++ b/src/math/x86_64/modfl.s
@ -0,0 +1,27 @@
+.global modfl
+.type modfl,@function
+modfl:
+	fldt 8(%rsp)
+	fxam
+	fnstsw %ax
+	sahf
+        jnp 1f
+        jc 2f
+1:      fld1
+        fld %st(1)
+1:      fprem
+        fnstsw %ax
+        sahf
+        jp 1b
+        fstp %st(1)
+        fsubr %st(0),%st(1)
+        fxch %st(1)
+        fstpt (%rdi)
+        ret
+
+2:	fstpt (%rdi)
+	mov 6(%rdi),%eax
+	and $0x80000000,%eax
+	mov %eax,8(%rsp)
+	flds 8(%rsp)
+	ret
--- a/src/math/x86_64/truncl.s
+++ b/src/math/x86_64/truncl.s
@ -0,0 +1 @@
+# see floor.s