optimize x86 feclearexcept: only use save/restore x87 fenv if needed

the x87 exception summary (ES) and stack fault (SF) flags may be
spuriously cleared by feclearexcept using the fnclex instruction,
but these flags are not observable through libc hence maintaining
their state is not critical.
This commit is contained in:
Szabolcs Nagy 2013-08-18 15:34:07 +00:00
parent baba2630c9
commit d8764bf840
2 changed files with 38 additions and 27 deletions

View File

@ -4,26 +4,41 @@
.type feclearexcept,@function
feclearexcept:
mov 4(%esp),%ecx
not %ecx
fnstsw %ax
# consider sse fenv as well if the cpu has XMM capability
call 1f
1: addl $__hwcap-1b,(%esp)
pop %edx
testl $0x02000000,(%edx)
jz 2f
# maintain exceptions in the sse mxcsr, clear x87 exceptions
test %eax,%ecx
jz 1f
stmxcsr 4(%esp)
and %ecx,4(%esp)
ldmxcsr 4(%esp)
1: test $0x3f,%ecx
jnz 2f
1: fnclex
xor %eax,%eax
fnclex
1: push %edx
stmxcsr (%esp)
pop %edx
and $0x3f,%eax
or %eax,%edx
test %edx,%ecx
jz 1f
not %ecx
and %ecx,%edx
push %edx
ldmxcsr (%esp)
pop %edx
1: xor %eax,%eax
ret
2: fnstsw %ax
# TODO: only load/store fenv if exceptions arent clear yet
and %ecx,%eax
# only do the expensive x87 fenv load/store when needed
2: test %eax,%ecx
jz 1b
sub $32,%esp
not %ecx
and %ecx,%eax
test $0x3f,%eax
jz 1f
fnclex
jmp 1b
1: sub $32,%esp
fnstenv (%esp)
mov %al,4(%esp)
fldenv (%esp)

View File

@ -1,25 +1,21 @@
.global feclearexcept
.type feclearexcept,@function
feclearexcept:
# maintain exceptions in the sse mxcsr, clear x87 exceptions
mov %edi,%ecx
fnstsw %ax
test %eax,%ecx
jz 1f
fnclex
1: stmxcsr -8(%rsp)
and $0x3f,%eax
or %eax,-8(%rsp)
test %ecx,-8(%rsp)
jz 1f
not %ecx
stmxcsr -8(%rsp)
and %ecx,-8(%rsp)
ldmxcsr -8(%rsp)
test $0x3f,%ecx
jnz 2f
1: fnclex
xor %eax,%eax
ret
2: fnstsw %ax
and %ecx,%eax
jz 1b
sub $32,%rsp
fnstenv (%rsp)
mov %al,4(%rsp)
fldenv (%rsp)
add $32,%rsp
xor %eax,%eax
1: xor %eax,%eax
ret
.global feraiseexcept