mirror of git://git.musl-libc.org/musl
math: fix remaining old long double code (erfl, fmal, lgammal, scalbnl)
in lgammal don't handle 1 and 2 specially, in fma use the new ldshape union instead of ld80 one.
This commit is contained in:
parent
535104ab6a
commit
34660d73bd
|
@ -253,8 +253,8 @@ static long double erfc1(long double x)
|
|||
|
||||
static long double erfc2(uint32_t ix, long double x)
|
||||
{
|
||||
union ldshape u;
|
||||
long double s,z,R,S;
|
||||
uint32_t i0,i1;
|
||||
|
||||
if (ix < 0x3fffa000) /* 0.84375 <= |x| < 1.25 */
|
||||
return erfc1(x);
|
||||
|
@ -288,28 +288,22 @@ static long double erfc2(uint32_t ix, long double x)
|
|||
S = sc[0] + s * (sc[1] + s * (sc[2] + s * (sc[3] +
|
||||
s * (sc[4] + s))));
|
||||
}
|
||||
z = x;
|
||||
GET_LDOUBLE_WORDS(ix, i0, i1, z);
|
||||
i1 = 0;
|
||||
i0 &= 0xffffff00;
|
||||
SET_LDOUBLE_WORDS(z, ix, i0, i1);
|
||||
u.f = x;
|
||||
u.i.m &= -1ULL << 40;
|
||||
z = u.f;
|
||||
return expl(-z*z - 0.5625) * expl((z - x) * (z + x) + R / S) / x;
|
||||
}
|
||||
|
||||
long double erfl(long double x)
|
||||
{
|
||||
long double r, s, z, y;
|
||||
uint32_t i0, i1, ix;
|
||||
int sign;
|
||||
union ldshape u = {x};
|
||||
uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48;
|
||||
int sign = u.i.se >> 15;
|
||||
|
||||
GET_LDOUBLE_WORDS(ix, i0, i1, x);
|
||||
sign = ix >> 15;
|
||||
ix &= 0x7fff;
|
||||
if (ix == 0x7fff) {
|
||||
if (ix >= 0x7fff0000)
|
||||
/* erf(nan)=nan, erf(+-inf)=+-1 */
|
||||
return 1 - 2*sign + 1/x;
|
||||
}
|
||||
ix = (ix << 16) | (i0 >> 16);
|
||||
if (ix < 0x3ffed800) { /* |x| < 0.84375 */
|
||||
if (ix < 0x3fde8000) { /* |x| < 2**-33 */
|
||||
return 0.125 * (8 * x + efx8 * x); /* avoid underflow */
|
||||
|
@ -332,17 +326,13 @@ long double erfl(long double x)
|
|||
long double erfcl(long double x)
|
||||
{
|
||||
long double r, s, z, y;
|
||||
uint32_t i0, i1, ix;
|
||||
int sign;
|
||||
union ldshape u = {x};
|
||||
uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48;
|
||||
int sign = u.i.se >> 15;
|
||||
|
||||
GET_LDOUBLE_WORDS(ix, i0, i1, x);
|
||||
sign = ix>>15;
|
||||
ix &= 0x7fff;
|
||||
if (ix == 0x7fff)
|
||||
if (ix >= 0x7fff0000)
|
||||
/* erfc(nan) = nan, erfc(+-inf) = 0,2 */
|
||||
return 2*sign + 1/x;
|
||||
|
||||
ix = (ix << 16) | (i0 >> 16);
|
||||
if (ix < 0x3ffed800) { /* |x| < 0.84375 */
|
||||
if (ix < 0x3fbe0000) /* |x| < 2**-65 */
|
||||
return 1.0 - x;
|
||||
|
@ -358,6 +348,7 @@ long double erfcl(long double x)
|
|||
}
|
||||
if (ix < 0x4005d600) /* |x| < 107 */
|
||||
return sign ? 2 - erfc2(ix,x) : erfc2(ix,x);
|
||||
return sign ? 2 - 0x1p-16382L : 0x1p-16382L*0x1p-16382L;
|
||||
y = 0x1p-16382L;
|
||||
return sign ? 2 - y : y*y;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -2,16 +2,6 @@
|
|||
#include "libm.h"
|
||||
|
||||
#if LDBL_MANT_DIG==64 && LDBL_MAX_EXP==16384
|
||||
union ld80 {
|
||||
long double x;
|
||||
struct {
|
||||
uint64_t m;
|
||||
uint16_t e : 15;
|
||||
uint16_t s : 1;
|
||||
uint16_t pad;
|
||||
} bits;
|
||||
};
|
||||
|
||||
/* exact add, assumes exponent_x >= exponent_y */
|
||||
static void add(long double *hi, long double *lo, long double x, long double y)
|
||||
{
|
||||
|
@ -45,25 +35,25 @@ return an adjusted hi so that rounding it to double (or less) precision is corre
|
|||
*/
|
||||
static long double adjust(long double hi, long double lo)
|
||||
{
|
||||
union ld80 uhi, ulo;
|
||||
union ldshape uhi, ulo;
|
||||
|
||||
if (lo == 0)
|
||||
return hi;
|
||||
uhi.x = hi;
|
||||
if (uhi.bits.m & 0x3ff)
|
||||
uhi.f = hi;
|
||||
if (uhi.i.m & 0x3ff)
|
||||
return hi;
|
||||
ulo.x = lo;
|
||||
if (uhi.bits.s == ulo.bits.s)
|
||||
uhi.bits.m++;
|
||||
ulo.f = lo;
|
||||
if ((uhi.i.se & 0x8000) == (ulo.i.se & 0x8000))
|
||||
uhi.i.m++;
|
||||
else {
|
||||
uhi.bits.m--;
|
||||
/* handle underflow and take care of ld80 implicit msb */
|
||||
if (uhi.bits.m == (uint64_t)-1/2) {
|
||||
uhi.bits.m *= 2;
|
||||
uhi.bits.e--;
|
||||
if (uhi.i.m << 1 == 0) {
|
||||
uhi.i.m = 0;
|
||||
uhi.i.se--;
|
||||
}
|
||||
uhi.i.m--;
|
||||
}
|
||||
return uhi.x;
|
||||
return uhi.f;
|
||||
}
|
||||
|
||||
/* adjusted add so the result is correct when rounded to double (or less) precision */
|
||||
|
@ -82,9 +72,9 @@ static long double dmul(long double x, long double y)
|
|||
|
||||
static int getexp(long double x)
|
||||
{
|
||||
union ld80 u;
|
||||
u.x = x;
|
||||
return u.bits.e;
|
||||
union ldshape u;
|
||||
u.f = x;
|
||||
return u.i.se & 0x7fff;
|
||||
}
|
||||
|
||||
double fma(double x, double y, double z)
|
||||
|
|
|
@ -34,6 +34,13 @@ long double fmal(long double x, long double y, long double z)
|
|||
}
|
||||
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
|
||||
#include <fenv.h>
|
||||
#if LDBL_MANT_DIG == 64
|
||||
#define LASTBIT(u) (u.i.m & 1)
|
||||
#define SPLIT (0x1p32L + 1)
|
||||
#elif LDBL_MANT_DIG == 113
|
||||
#define LASTBIT(u) (u.i.lo & 1)
|
||||
#define SPLIT (0x1p57L + 1)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* A struct dd represents a floating-point number with twice the precision
|
||||
|
@ -75,12 +82,12 @@ static inline struct dd dd_add(long double a, long double b)
|
|||
static inline long double add_adjusted(long double a, long double b)
|
||||
{
|
||||
struct dd sum;
|
||||
union IEEEl2bits u;
|
||||
union ldshape u;
|
||||
|
||||
sum = dd_add(a, b);
|
||||
if (sum.lo != 0) {
|
||||
u.e = sum.hi;
|
||||
if ((u.bits.manl & 1) == 0)
|
||||
u.f = sum.hi;
|
||||
if (!LASTBIT(u))
|
||||
sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
|
||||
}
|
||||
return (sum.hi);
|
||||
|
@ -95,7 +102,7 @@ static inline long double add_and_denormalize(long double a, long double b, int
|
|||
{
|
||||
struct dd sum;
|
||||
int bits_lost;
|
||||
union IEEEl2bits u;
|
||||
union ldshape u;
|
||||
|
||||
sum = dd_add(a, b);
|
||||
|
||||
|
@ -110,9 +117,9 @@ static inline long double add_and_denormalize(long double a, long double b, int
|
|||
* break the ties manually.
|
||||
*/
|
||||
if (sum.lo != 0) {
|
||||
u.e = sum.hi;
|
||||
bits_lost = -u.bits.exp - scale + 1;
|
||||
if (bits_lost != 1 ^ (int)(u.bits.manl & 1))
|
||||
u.f = sum.hi;
|
||||
bits_lost = -u.i.se - scale + 1;
|
||||
if ((bits_lost != 1) ^ LASTBIT(u))
|
||||
sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
|
||||
}
|
||||
return scalbnl(sum.hi, scale);
|
||||
|
@ -125,20 +132,15 @@ static inline long double add_and_denormalize(long double a, long double b, int
|
|||
*/
|
||||
static inline struct dd dd_mul(long double a, long double b)
|
||||
{
|
||||
#if LDBL_MANT_DIG == 64
|
||||
static const long double split = 0x1p32L + 1.0;
|
||||
#elif LDBL_MANT_DIG == 113
|
||||
static const long double split = 0x1p57L + 1.0;
|
||||
#endif
|
||||
struct dd ret;
|
||||
long double ha, hb, la, lb, p, q;
|
||||
|
||||
p = a * split;
|
||||
p = a * SPLIT;
|
||||
ha = a - p;
|
||||
ha += p;
|
||||
la = a - ha;
|
||||
|
||||
p = b * split;
|
||||
p = b * SPLIT;
|
||||
hb = b - p;
|
||||
hb += p;
|
||||
lb = b - hb;
|
||||
|
|
|
@ -202,13 +202,11 @@ w7 = 4.885026142432270781165E-3L;
|
|||
|
||||
static long double sin_pi(long double x)
|
||||
{
|
||||
union ldshape u = {x};
|
||||
uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48;
|
||||
long double y, z;
|
||||
int n, ix;
|
||||
uint32_t se, i0, i1;
|
||||
int n;
|
||||
|
||||
GET_LDOUBLE_WORDS(se, i0, i1, x);
|
||||
ix = se & 0x7fff;
|
||||
ix = (ix << 16) | (i0 >> 16);
|
||||
if (ix < 0x3ffd8000) /* 0.25 */
|
||||
return sinl(pi * x);
|
||||
y = -x; /* x is assume negative */
|
||||
|
@ -229,8 +227,8 @@ static long double sin_pi(long double x)
|
|||
} else {
|
||||
if (ix < 0x403e8000) /* 2^63 */
|
||||
z = y + two63; /* exact */
|
||||
GET_LDOUBLE_WORDS(se, i0, i1, z);
|
||||
n = i1 & 1;
|
||||
u.f = z;
|
||||
n = u.i.m & 1;
|
||||
y = n;
|
||||
n <<= 2;
|
||||
}
|
||||
|
@ -261,33 +259,28 @@ static long double sin_pi(long double x)
|
|||
|
||||
long double __lgammal_r(long double x, int *sg) {
|
||||
long double t, y, z, nadj, p, p1, p2, q, r, w;
|
||||
int i, ix;
|
||||
uint32_t se, i0, i1;
|
||||
union ldshape u = {x};
|
||||
uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48;
|
||||
int sign = u.i.se >> 15;
|
||||
int i;
|
||||
|
||||
*sg = 1;
|
||||
GET_LDOUBLE_WORDS(se, i0, i1, x);
|
||||
ix = se & 0x7fff;
|
||||
|
||||
if ((ix | i0 | i1) == 0) {
|
||||
if (se & 0x8000)
|
||||
*sg = -1;
|
||||
return 1.0 / fabsl(x);
|
||||
}
|
||||
|
||||
ix = (ix << 16) | (i0 >> 16);
|
||||
|
||||
/* purge off +-inf, NaN, +-0, and negative arguments */
|
||||
if (ix >= 0x7fff0000)
|
||||
return x * x;
|
||||
|
||||
if (x == 0) {
|
||||
*sg -= 2*sign;
|
||||
return 1.0 / fabsl(x);
|
||||
}
|
||||
if (ix < 0x3fc08000) { /* |x|<2**-63, return -log(|x|) */
|
||||
if (se & 0x8000) {
|
||||
if (sign) {
|
||||
*sg = -1;
|
||||
return -logl(-x);
|
||||
}
|
||||
return -logl(x);
|
||||
}
|
||||
if (se & 0x8000) {
|
||||
if (sign) {
|
||||
t = sin_pi (x);
|
||||
if (t == 0.0)
|
||||
return 1.0 / fabsl(t); /* -integer */
|
||||
|
@ -297,11 +290,7 @@ long double __lgammal_r(long double x, int *sg) {
|
|||
x = -x;
|
||||
}
|
||||
|
||||
/* purge off 1 and 2 */
|
||||
if ((((ix - 0x3fff8000) | i0 | i1) == 0) ||
|
||||
(((ix - 0x40008000) | i0 | i1) == 0))
|
||||
r = 0;
|
||||
else if (ix < 0x40008000) { /* x < 2.0 */
|
||||
if (ix < 0x40008000) { /* x < 2.0 */
|
||||
if (ix <= 0x3ffee666) { /* 8.99993896484375e-1 */
|
||||
/* lgamma(x) = lgamma(x+1) - log(x) */
|
||||
r = -logl(x);
|
||||
|
@ -380,7 +369,7 @@ long double __lgammal_r(long double x, int *sg) {
|
|||
r = (x - 0.5) * (t - 1.0) + w;
|
||||
} else /* 2**66 <= x <= inf */
|
||||
r = x * (logl(x) - 1.0);
|
||||
if (se & 0x8000)
|
||||
if (sign)
|
||||
r = nadj - r;
|
||||
return r;
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ long double scalbnl(long double x, int n)
|
|||
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
|
||||
long double scalbnl(long double x, int n)
|
||||
{
|
||||
union IEEEl2bits scale;
|
||||
union ldshape u;
|
||||
|
||||
if (n > 16383) {
|
||||
x *= 0x1p16383L;
|
||||
|
@ -29,8 +29,8 @@ long double scalbnl(long double x, int n)
|
|||
n = -16382;
|
||||
}
|
||||
}
|
||||
scale.e = 1.0;
|
||||
scale.bits.exp = 0x3fff + n;
|
||||
return x * scale.e;
|
||||
u.f = 1.0;
|
||||
u.i.se = 0x3fff + n;
|
||||
return x * u.f;
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue