mirror of
git://git.musl-libc.org/musl
synced 2024-12-27 01:02:12 +00:00
optimize x86 feclearexcept: only use save/restore x87 fenv if needed
the x87 exception summary (ES) and stack fault (SF) flags may be spuriously cleared by feclearexcept using the fnclex instruction, but these flags are not observable through libc hence maintaining their state is not critical.
This commit is contained in:
parent
baba2630c9
commit
d8764bf840
@ -4,26 +4,41 @@
|
||||
.type feclearexcept,@function
|
||||
feclearexcept:
|
||||
mov 4(%esp),%ecx
|
||||
not %ecx
|
||||
fnstsw %ax
|
||||
# consider sse fenv as well if the cpu has XMM capability
|
||||
call 1f
|
||||
1: addl $__hwcap-1b,(%esp)
|
||||
pop %edx
|
||||
testl $0x02000000,(%edx)
|
||||
jz 2f
|
||||
# maintain exceptions in the sse mxcsr, clear x87 exceptions
|
||||
test %eax,%ecx
|
||||
jz 1f
|
||||
stmxcsr 4(%esp)
|
||||
and %ecx,4(%esp)
|
||||
ldmxcsr 4(%esp)
|
||||
1: test $0x3f,%ecx
|
||||
jnz 2f
|
||||
1: fnclex
|
||||
xor %eax,%eax
|
||||
fnclex
|
||||
1: push %edx
|
||||
stmxcsr (%esp)
|
||||
pop %edx
|
||||
and $0x3f,%eax
|
||||
or %eax,%edx
|
||||
test %edx,%ecx
|
||||
jz 1f
|
||||
not %ecx
|
||||
and %ecx,%edx
|
||||
push %edx
|
||||
ldmxcsr (%esp)
|
||||
pop %edx
|
||||
1: xor %eax,%eax
|
||||
ret
|
||||
2: fnstsw %ax
|
||||
# TODO: only load/store fenv if exceptions arent clear yet
|
||||
and %ecx,%eax
|
||||
# only do the expensive x87 fenv load/store when needed
|
||||
2: test %eax,%ecx
|
||||
jz 1b
|
||||
sub $32,%esp
|
||||
not %ecx
|
||||
and %ecx,%eax
|
||||
test $0x3f,%eax
|
||||
jz 1f
|
||||
fnclex
|
||||
jmp 1b
|
||||
1: sub $32,%esp
|
||||
fnstenv (%esp)
|
||||
mov %al,4(%esp)
|
||||
fldenv (%esp)
|
||||
|
@ -1,25 +1,21 @@
|
||||
.global feclearexcept
|
||||
.type feclearexcept,@function
|
||||
feclearexcept:
|
||||
# maintain exceptions in the sse mxcsr, clear x87 exceptions
|
||||
mov %edi,%ecx
|
||||
fnstsw %ax
|
||||
test %eax,%ecx
|
||||
jz 1f
|
||||
fnclex
|
||||
1: stmxcsr -8(%rsp)
|
||||
and $0x3f,%eax
|
||||
or %eax,-8(%rsp)
|
||||
test %ecx,-8(%rsp)
|
||||
jz 1f
|
||||
not %ecx
|
||||
stmxcsr -8(%rsp)
|
||||
and %ecx,-8(%rsp)
|
||||
ldmxcsr -8(%rsp)
|
||||
test $0x3f,%ecx
|
||||
jnz 2f
|
||||
1: fnclex
|
||||
xor %eax,%eax
|
||||
ret
|
||||
2: fnstsw %ax
|
||||
and %ecx,%eax
|
||||
jz 1b
|
||||
sub $32,%rsp
|
||||
fnstenv (%rsp)
|
||||
mov %al,4(%rsp)
|
||||
fldenv (%rsp)
|
||||
add $32,%rsp
|
||||
xor %eax,%eax
|
||||
1: xor %eax,%eax
|
||||
ret
|
||||
|
||||
.global feraiseexcept
|
||||
|
Loading…
Reference in New Issue
Block a user