math: lgamma cleanup (simpler sin(pi*x) for the negative case)

* simplify sin_pi(x) (don't care about inexact here, the result is
  inexact anyway, and x is not so small to underflow)
* in lgammal add the previously removed special case for x==1 and
  x==2 (to fix the sign of zero in downward rounding mode)
* only define lgammal on supported long double platforms
* change tgamma so the generated code is a bit smaller
This commit is contained in:
Szabolcs Nagy 2013-11-21 01:01:57 +00:00
parent 326e5c2e27
commit ebbaf2180e
4 changed files with 112 additions and 204 deletions

View File

@ -82,7 +82,6 @@
#include "libc.h" #include "libc.h"
static const double static const double
two52= 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */ a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */ a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
@ -147,91 +146,62 @@ w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */ w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
/* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
static double sin_pi(double x) static double sin_pi(double x)
{ {
double y,z; int n;
int n,ix;
GET_HIGH_WORD(ix, x); /* spurious inexact if odd int */
ix &= 0x7fffffff; x = 2.0*(x*0.5 - floor(x*0.5)); /* x mod 2.0 */
if (ix < 0x3fd00000) n = (int)(x*4.0);
return __sin(pi*x, 0.0, 0); n = (n+1)/2;
x -= n*0.5f;
x *= pi;
y = -x; /* negative x is assumed */
/*
* argument reduction, make sure inexact flag not raised if input
* is an integer
*/
z = floor(y);
if (z != y) { /* inexact anyway */
y *= 0.5;
y = 2.0*(y - floor(y)); /* y = |x| mod 2.0 */
n = (int)(y*4.0);
} else {
if (ix >= 0x43400000) {
y = 0.0; /* y must be even */
n = 0;
} else {
if (ix < 0x43300000)
z = y + two52; /* exact */
GET_LOW_WORD(n, z);
n &= 1;
y = n;
n <<= 2;
}
}
switch (n) { switch (n) {
case 0: y = __sin(pi*y, 0.0, 0); break; default: /* case 4: */
case 1: case 0: return __sin(x, 0.0, 0);
case 2: y = __cos(pi*(0.5-y), 0.0); break; case 1: return __cos(x, 0.0);
case 3: case 2: return __sin(-x, 0.0, 0);
case 4: y = __sin(pi*(1.0-y), 0.0, 0); break; case 3: return -__cos(x, 0.0);
case 5:
case 6: y = -__cos(pi*(y-1.5), 0.0); break;
default: y = __sin(pi*(y-2.0), 0.0, 0); break;
} }
return -y;
} }
double __lgamma_r(double x, int *signgamp) double __lgamma_r(double x, int *signgamp)
{ {
double t,y,z,nadj,p,p1,p2,p3,q,r,w; union {double f; uint64_t i;} u = {x};
int32_t hx; double_t t,y,z,nadj,p,p1,p2,p3,q,r,w;
int i,lx,ix; uint32_t ix;
int sign,i;
EXTRACT_WORDS(hx, lx, x);
/* purge off +-inf, NaN, +-0, tiny and negative arguments */ /* purge off +-inf, NaN, +-0, tiny and negative arguments */
*signgamp = 1; *signgamp = 1;
ix = hx & 0x7fffffff; sign = u.i>>63;
ix = u.i>>32 & 0x7fffffff;
if (ix >= 0x7ff00000) if (ix >= 0x7ff00000)
return x*x; return x*x;
if ((ix|lx) == 0) if (ix < (0x3ff-70)<<20) { /* |x|<2**-70, return -log(|x|) */
return 1.0/0.0; if(sign) {
if (ix < 0x3b900000) { /* |x|<2**-70, return -log(|x|) */ x = -x;
if(hx < 0) {
*signgamp = -1; *signgamp = -1;
return -log(-x);
} }
return -log(x); return -log(x);
} }
if (hx < 0) { if (sign) {
if (ix >= 0x43300000) /* |x|>=2**52, must be -integer */ x = -x;
return 1.0/0.0;
t = sin_pi(x); t = sin_pi(x);
if (t == 0.0) /* -integer */ if (t == 0.0) /* -integer */
return 1.0/0.0; return 1.0/(x-x);
nadj = log(pi/fabs(t*x)); if (t > 0.0)
if (t < 0.0)
*signgamp = -1; *signgamp = -1;
x = -x; else
t = -t;
nadj = log(pi/(t*x));
} }
/* purge off 1 and 2 */ /* purge off 1 and 2 */
if (((ix - 0x3ff00000)|lx) == 0 || ((ix - 0x40000000)|lx) == 0) if ((ix == 0x3ff00000 || ix == 0x40000000) && (uint32_t)u.i == 0)
r = 0; r = 0;
/* for x < 2.0 */ /* for x < 2.0 */
else if (ix < 0x40000000) { else if (ix < 0x40000000) {
@ -306,7 +276,7 @@ double __lgamma_r(double x, int *signgamp)
r = (x-0.5)*(t-1.0)+w; r = (x-0.5)*(t-1.0)+w;
} else /* 2**58 <= x <= inf */ } else /* 2**58 <= x <= inf */
r = x*(log(x)-1.0); r = x*(log(x)-1.0);
if (hx < 0) if (sign)
r = nadj - r; r = nadj - r;
return r; return r;
} }

View File

@ -17,7 +17,6 @@
#include "libc.h" #include "libc.h"
static const float static const float
two23= 8.3886080000e+06, /* 0x4b000000 */
pi = 3.1415927410e+00, /* 0x40490fdb */ pi = 3.1415927410e+00, /* 0x40490fdb */
a0 = 7.7215664089e-02, /* 0x3d9e233f */ a0 = 7.7215664089e-02, /* 0x3d9e233f */
a1 = 3.2246702909e-01, /* 0x3ea51a66 */ a1 = 3.2246702909e-01, /* 0x3ea51a66 */
@ -82,87 +81,58 @@ w4 = -5.9518753551e-04, /* 0xba1c065c */
w5 = 8.3633989561e-04, /* 0x3a5b3dd2 */ w5 = 8.3633989561e-04, /* 0x3a5b3dd2 */
w6 = -1.6309292987e-03; /* 0xbad5c4e8 */ w6 = -1.6309292987e-03; /* 0xbad5c4e8 */
static float sin_pif(float x) /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
static float sin_pi(float x)
{ {
float y,z; double_t y;
int n,ix; int n;
GET_FLOAT_WORD(ix, x); /* spurious inexact if odd int */
ix &= 0x7fffffff; x = 2*(x*0.5f - floorf(x*0.5f)); /* x mod 2.0 */
if(ix < 0x3e800000) n = (int)(x*4);
return __sindf(pi*x); n = (n+1)/2;
y = x - n*0.5f;
y = -x; /* negative x is assumed */ y *= 3.14159265358979323846;
/*
* argument reduction, make sure inexact flag not raised if input
* is an integer
*/
z = floorf(y);
if (z != y) { /* inexact anyway */
y *= 0.5f;
y = 2.0f*(y - floorf(y)); /* y = |x| mod 2.0 */
n = (int)(y*4.0f);
} else {
if (ix >= 0x4b800000) {
y = 0.0f; /* y must be even */
n = 0;
} else {
if (ix < 0x4b000000)
z = y + two23; /* exact */
GET_FLOAT_WORD(n, z);
n &= 1;
y = n;
n <<= 2;
}
}
switch (n) { switch (n) {
case 0: y = __sindf(pi*y); break; default: /* case 4: */
case 1: case 0: return __sindf(y);
case 2: y = __cosdf(pi*(0.5f - y)); break; case 1: return __cosdf(y);
case 3: case 2: return __sindf(-y);
case 4: y = __sindf(pi*(1.0f - y)); break; case 3: return -__cosdf(y);
case 5:
case 6: y = -__cosdf(pi*(y - 1.5f)); break;
default: y = __sindf(pi*(y - 2.0f)); break;
} }
return -y;
} }
float __lgammaf_r(float x, int *signgamp) float __lgammaf_r(float x, int *signgamp)
{ {
union {float f; uint32_t i;} u = {x};
float t,y,z,nadj,p,p1,p2,p3,q,r,w; float t,y,z,nadj,p,p1,p2,p3,q,r,w;
int32_t hx; uint32_t ix;
int i,ix; int i,sign;
GET_FLOAT_WORD(hx, x);
/* purge off +-inf, NaN, +-0, tiny and negative arguments */ /* purge off +-inf, NaN, +-0, tiny and negative arguments */
*signgamp = 1; *signgamp = 1;
ix = hx & 0x7fffffff; sign = u.i>>31;
ix = u.i & 0x7fffffff;
if (ix >= 0x7f800000) if (ix >= 0x7f800000)
return x*x; return x*x;
if (ix == 0)
return 1.0f/0.0f;
if (ix < 0x35000000) { /* |x| < 2**-21, return -log(|x|) */ if (ix < 0x35000000) { /* |x| < 2**-21, return -log(|x|) */
if (hx < 0) { if (sign) {
*signgamp = -1; *signgamp = -1;
return -logf(-x); x = -x;
} }
return -logf(x); return -logf(x);
} }
if (hx < 0) { if (sign) {
if (ix >= 0x4b000000) /* |x| >= 2**23, must be -integer */
return 1.0f/0.0f;
t = sin_pif(x);
if (t == 0.0f) /* -integer */
return 1.0f/0.0f;
nadj = logf(pi/fabsf(t*x));
if (t < 0.0f)
*signgamp = -1;
x = -x; x = -x;
t = sin_pi(x);
if (t == 0.0f) /* -integer */
return 1.0f/(x-x);
if (t > 0.0f)
*signgamp = -1;
else
t = -t;
nadj = logf(pi/(t*x));
} }
/* purge off 1 and 2 */ /* purge off 1 and 2 */
@ -241,7 +211,7 @@ float __lgammaf_r(float x, int *signgamp)
r = (x-0.5f)*(t-1.0f)+w; r = (x-0.5f)*(t-1.0f)+w;
} else /* 2**58 <= x <= inf */ } else /* 2**58 <= x <= inf */
r = x*(logf(x)-1.0f); r = x*(logf(x)-1.0f);
if (hx < 0) if (sign)
r = nadj - r; r = nadj - r;
return r; return r;
} }

View File

@ -99,7 +99,6 @@ long double __lgammal_r(long double x, int *sg)
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
static const long double static const long double
pi = 3.14159265358979323846264L, pi = 3.14159265358979323846264L,
two63 = 9.223372036854775808e18L,
/* lgam(1+x) = 0.5 x + x a(x)/b(x) /* lgam(1+x) = 0.5 x + x a(x)/b(x)
-0.268402099609375 <= x <= 0 -0.268402099609375 <= x <= 0
@ -201,61 +200,27 @@ w5 = 8.412723297322498080632E-4L,
w6 = -1.880801938119376907179E-3L, w6 = -1.880801938119376907179E-3L,
w7 = 4.885026142432270781165E-3L; w7 = 4.885026142432270781165E-3L;
/* sin(pi*x) assuming x > 2^-1000, if sin(pi*x)==0 the sign is arbitrary */
static long double sin_pi(long double x) static long double sin_pi(long double x)
{ {
union ldshape u = {x};
uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48;
long double y, z;
int n; int n;
if (ix < 0x3ffd8000) /* 0.25 */ /* spurious inexact if odd int */
return sinl(pi * x); x *= 0.5;
y = -x; /* x is assume negative */ x = 2.0*(x - floorl(x)); /* x mod 2.0 */
/* n = (int)(x*4.0);
* argument reduction, make sure inexact flag not raised if input n = (n+1)/2;
* is an integer x -= n*0.5f;
*/ x *= pi;
z = floorl(y);
if (z != y) { /* inexact anyway */
y *= 0.5;
y = 2.0*(y - floorl(y));/* y = |x| mod 2.0 */
n = (int) (y*4.0);
} else {
if (ix >= 0x403f8000) { /* 2^64 */
y = 0.0; /* y must be even */
n = 0;
} else {
if (ix < 0x403e8000) /* 2^63 */
z = y + two63; /* exact */
u.f = z;
n = u.i.m & 1;
y = n;
n <<= 2;
}
}
switch (n) { switch (n) {
case 0: default: /* case 4: */
y = sinl(pi * y); case 0: return __sinl(x, 0.0, 0);
break; case 1: return __cosl(x, 0.0);
case 1: case 2: return __sinl(-x, 0.0, 0);
case 2: case 3: return -__cosl(x, 0.0);
y = cosl(pi * (0.5 - y));
break;
case 3:
case 4:
y = sinl(pi * (1.0 - y));
break;
case 5:
case 6:
y = -cosl(pi * (y - 1.5));
break;
default:
y = sinl(pi * (y - 2.0));
break;
} }
return -y;
} }
long double __lgammal_r(long double x, int *sg) { long double __lgammal_r(long double x, int *sg) {
@ -267,31 +232,32 @@ long double __lgammal_r(long double x, int *sg) {
*sg = 1; *sg = 1;
/* purge off +-inf, NaN, +-0, and negative arguments */ /* purge off +-inf, NaN, +-0, tiny and negative arguments */
if (ix >= 0x7fff0000) if (ix >= 0x7fff0000)
return x * x; return x * x;
if (x == 0) {
*sg -= 2*sign;
return 1.0 / fabsl(x);
}
if (ix < 0x3fc08000) { /* |x|<2**-63, return -log(|x|) */ if (ix < 0x3fc08000) { /* |x|<2**-63, return -log(|x|) */
if (sign) { if (sign) {
*sg = -1; *sg = -1;
return -logl(-x); x = -x;
} }
return -logl(x); return -logl(x);
} }
if (sign) { if (sign) {
t = sin_pi (x);
if (t == 0.0)
return 1.0 / fabsl(t); /* -integer */
nadj = logl(pi / fabsl(t * x));
if (t < 0.0)
*sg = -1;
x = -x; x = -x;
t = sin_pi(x);
if (t == 0.0)
return 1.0 / (x-x); /* -integer */
if (t > 0.0)
*sg = -1;
else
t = -t;
nadj = logl(pi / (t * x));
} }
if (ix < 0x40008000) { /* x < 2.0 */ /* purge off 1 and 2 (so the sign is ok with downward rounding) */
if ((ix == 0x3fff8000 || ix == 0x40008000) && u.i.m == 0) {
r = 0;
} else if (ix < 0x40008000) { /* x < 2.0 */
if (ix <= 0x3ffee666) { /* 8.99993896484375e-1 */ if (ix <= 0x3ffee666) { /* 8.99993896484375e-1 */
/* lgamma(x) = lgamma(x+1) - log(x) */ /* lgamma(x) = lgamma(x+1) - log(x) */
r = -logl(x); r = -logl(x);
@ -376,6 +342,7 @@ long double __lgammal_r(long double x, int *sg) {
} }
#endif #endif
#if (LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) || (LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384)
extern int __signgam; extern int __signgam;
long double lgammal(long double x) long double lgammal(long double x)
@ -384,3 +351,4 @@ long double lgammal(long double x)
} }
weak_alias(__lgammal_r, lgammal_r); weak_alias(__lgammal_r, lgammal_r);
#endif

View File

@ -26,7 +26,7 @@ most ideas and constants are from boost and python
static const double pi = 3.141592653589793238462643383279502884; static const double pi = 3.141592653589793238462643383279502884;
/* sin(pi x) with x > 0 && isnormal(x) assumption */ /* sin(pi x) with x > 0x1p-100, if sin(pi*x)==0 the sign is arbitrary */
static double sinpi(double x) static double sinpi(double x)
{ {
int n; int n;
@ -49,8 +49,7 @@ static double sinpi(double x)
case 1: case 1:
return __cos(x, 0); return __cos(x, 0);
case 2: case 2:
/* sin(0-x) and -sin(x) have different sign at 0 */ return __sin(-x, 0, 0);
return __sin(0-x, 0, 0);
case 3: case 3:
return -__cos(x, 0); return -__cos(x, 0);
} }
@ -108,35 +107,33 @@ static double S(double x)
double tgamma(double x) double tgamma(double x)
{ {
double absx, y, dy, z, r; union {double f; uint64_t i;} u = {x};
double absx, y;
double_t dy, z, r;
uint32_t ix = u.i>>32 & 0x7fffffff;
int sign = u.i>>63;
/* special cases */ /* special cases */
if (!isfinite(x)) if (ix >= 0x7ff00000)
/* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */ /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
return x + INFINITY; return x + INFINITY;
if (ix < (0x3ff-54)<<20)
/* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */
return 1/x;
/* integer arguments */ /* integer arguments */
/* raise inexact when non-integer */ /* raise inexact when non-integer */
if (x == floor(x)) { if (x == floor(x)) {
if (x == 0) if (sign)
/* tgamma(+-0)=+-inf with divide-by-zero */
return 1/x;
if (x < 0)
return 0/0.0; return 0/0.0;
if (x <= sizeof fact/sizeof *fact) if (x <= sizeof fact/sizeof *fact)
return fact[(int)x - 1]; return fact[(int)x - 1];
} }
absx = fabs(x);
/* x ~ 0: tgamma(x) ~ 1/x */
if (absx < 0x1p-54)
return 1/x;
/* x >= 172: tgamma(x)=inf with overflow */ /* x >= 172: tgamma(x)=inf with overflow */
/* x =< -184: tgamma(x)=+-0 with underflow */ /* x =< -184: tgamma(x)=+-0 with underflow */
if (absx >= 184) { if (ix >= 0x40670000) { /* |x| >= 184 */
if (x < 0) { if (sign) {
FORCE_EVAL((float)(0x1p-126/x)); FORCE_EVAL((float)(0x1p-126/x));
if (floor(x) * 0.5 == floor(x * 0.5)) if (floor(x) * 0.5 == floor(x * 0.5))
return 0; return 0;
@ -146,6 +143,8 @@ double tgamma(double x)
return x; return x;
} }
absx = sign ? -x : x;
/* handle the error of x + g - 0.5 */ /* handle the error of x + g - 0.5 */
y = absx + gmhalf; y = absx + gmhalf;
if (absx > gmhalf) { if (absx > gmhalf) {
@ -160,20 +159,21 @@ double tgamma(double x)
r = S(absx) * exp(-y); r = S(absx) * exp(-y);
if (x < 0) { if (x < 0) {
/* reflection formula for negative x */ /* reflection formula for negative x */
/* sinpi(absx) is not 0, integers are already handled */
r = -pi / (sinpi(absx) * absx * r); r = -pi / (sinpi(absx) * absx * r);
dy = -dy; dy = -dy;
z = -z; z = -z;
} }
r += dy * (gmhalf+0.5) * r / y; r += dy * (gmhalf+0.5) * r / y;
z = pow(y, 0.5*z); z = pow(y, 0.5*z);
r = r * z * z; y = r * z * z;
return r; return y;
} }
#if 0 #if 0
double __lgamma_r(double x, int *sign) double __lgamma_r(double x, int *sign)
{ {
double r, absx, z, zz, w; double r, absx;
*sign = 1; *sign = 1;