math: add fp_arch.h with fp_barrier and fp_force_eval

C99 has ways to support fenv access, but compilers don't implement it
and assume nearest rounding mode and no fp status flag access. (gcc has
-frounding-math and then it does not assume nearest rounding mode, but
it still assumes the compiled code itself does not change the mode.
Even if the C99 mechanism was implemented it is not ideal: it requires
all code in the library to be compiled with FENV_ACCESS "on" to make it
usable in non-nearest rounding mode, but that limits optimizations more
than necessary.)

The math functions should give reasonable results in all rounding modes
(but the quality may be degraded in non-nearest rounding modes) and the
fp status flag settings should follow the spec, so fenv side-effects are
important and code transformations that break them should be prevented.

Unfortunately compilers don't give any help with this, the best we can
do is to add fp barriers to the code using volatile local variables
(they create a stack frame and undesirable memory accesses to it) or
inline asm (gcc specific, requires target specific fp reg constraints,
often creates unnecessary reg moves and multiple barriers are needed to
express that an operation has side-effects) or extern call (only useful
in tail-call position to avoid stack-frame creation and does not work
with lto).

We assume that in a math function if an operation depends on the input
and the output depends on it, then the operation will be evaluated at
runtime when the function is called, producing all the expected fenv
side-effects (this is not true in case of lto and in case the operation
is evaluated with excess precision that is not rounded away). So fp
barriers are needed (1) to prevent the move of an operation within a
function (in case it may be moved from an unevaluated code path into an
evaluated one or if it may be moved across a fenv access), (2) force the
evaluation of an operation for its side-effect when it has no input
dependency (may be constant folded) or (3) when its output is unused. I
belive that fp_barrier and fp_force_eval can take care of these and they
should not be needed in hot code paths.
This commit is contained in:
Szabolcs Nagy 2018-11-26 23:30:00 +00:00 committed by Rich Felker
parent f107d34e76
commit b50d315fd2
3 changed files with 90 additions and 6 deletions

25
arch/aarch64/fp_arch.h Normal file
View File

@ -0,0 +1,25 @@
#define fp_barrierf fp_barrierf
static inline float fp_barrierf(float x)
{
__asm__ __volatile__ ("" : "+w"(x));
return x;
}
#define fp_barrier fp_barrier
static inline double fp_barrier(double x)
{
__asm__ __volatile__ ("" : "+w"(x));
return x;
}
#define fp_force_evalf fp_force_evalf
static inline void fp_force_evalf(float x)
{
__asm__ __volatile__ ("" : "+w"(x));
}
#define fp_force_eval fp_force_eval
static inline void fp_force_eval(double x)
{
__asm__ __volatile__ ("" : "+w"(x));
}

0
arch/generic/fp_arch.h Normal file
View File

View File

@ -5,6 +5,7 @@
#include <float.h>
#include <math.h>
#include <endian.h>
#include "fp_arch.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
@ -58,16 +59,74 @@ union ldshape {
#error Unsupported long double representation
#endif
/* fp_barrier returns its input, but limits code transformations
as if it had a side-effect (e.g. observable io) and returned
an arbitrary value. */
#ifndef fp_barrierf
#define fp_barrierf fp_barrierf
static inline float fp_barrierf(float x)
{
volatile float y = x;
return y;
}
#endif
#ifndef fp_barrier
#define fp_barrier fp_barrier
static inline double fp_barrier(double x)
{
volatile double y = x;
return y;
}
#endif
#ifndef fp_barrierl
#define fp_barrierl fp_barrierl
static inline long double fp_barrierl(long double x)
{
volatile long double y = x;
return y;
}
#endif
/* fp_force_eval ensures that the input value is computed when that's
otherwise unused. To prevent the constant folding of the input
expression, an additional fp_barrier may be needed or a compilation
mode that does so (e.g. -frounding-math in gcc). Then it can be
used to evaluate an expression for its fenv side-effects only. */
#ifndef fp_force_evalf
#define fp_force_evalf fp_force_evalf
static inline void fp_force_evalf(float x)
{
volatile float y = x;
}
#endif
#ifndef fp_force_eval
#define fp_force_eval fp_force_eval
static inline void fp_force_eval(double x)
{
volatile double y = x;
}
#endif
#ifndef fp_force_evall
#define fp_force_evall fp_force_evall
static inline void fp_force_evall(long double x)
{
volatile long double y = x;
}
#endif
#define FORCE_EVAL(x) do { \
if (sizeof(x) == sizeof(float)) { \
volatile float __x; \
__x = (x); \
fp_force_evalf(x); \
} else if (sizeof(x) == sizeof(double)) { \
volatile double __x; \
__x = (x); \
fp_force_eval(x); \
} else { \
volatile long double __x; \
__x = (x); \
fp_force_evall(x); \
} \
} while(0)