math: add a non-dummy tgamma implementation

uses the lanczos approximation method with the usual tweaks. same parameters were selected as in boost and python. (avoides some extra work and special casing found in boost so the precision is not that good: measured error is <5ulp for positive x and <10ulp for negative) an alternative lgamma_r implementation is also given in the same file which is simpler and smaller than the current one, but less precise so it's ifdefed out for now.
2025-02-18 20:06:50 +00:00 · 2012-12-12 01:43:43 +01:00 · 2012-12-12 01:43:43 +01:00 · 0f53c1a426
commit 0f53c1a426
parent 14cc9c7f38
2 changed files with 215 additions and 20 deletions
--- a/src/math/tgamma.c
+++ b/src/math/tgamma.c
@ -1,16 +1,221 @@
-#include <math.h>
+/*
+"A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964)
+"Lanczos Implementation of the Gamma Function" - Paul Godfrey (2001)
+"An Analysis of the Lanczos Gamma Approximation" - Glendon Ralph Pugh (2004)

-// FIXME: use lanczos approximation
+approximation method:

-double __lgamma_r(double, int *);
+                        (x - 0.5)         S(x)
+Gamma(x) = (x + g - 0.5)         *  ----------------
+                                    exp(x + g - 0.5)
+
+with
+                 a1      a2      a3            aN
+S(x) ~= [ a0 + ----- + ----- + ----- + ... + ----- ]
+               x + 1   x + 2   x + 3         x + N
+
+with a0, a1, a2, a3,.. aN constants which depend on g.
+
+for x < 0 the following reflection formula is used:
+
+Gamma(x)*Gamma(-x) = -pi/(x sin(pi x))
+
+most ideas and constants are from boost and python
+*/
+#include "libm.h"
+
+static const double pi = 3.141592653589793238462643383279502884;
+
+/* sin(pi x) with x > 0 && isnormal(x) assumption */
+static double sinpi(double x)
+{
+	int n;
+
+	/* argument reduction: x = |x| mod 2 */
+	/* spurious inexact when x is odd int */
+	x = x * 0.5;
+	x = 2 * (x - floor(x));
+
+	/* reduce x into [-.25,.25] */
+	n = 4 * x;
+	n = (n+1)/2;
+	x -= n * 0.5;
+
+	x *= pi;
+	switch (n) {
+	default: /* case 4 */
+	case 0:
+		return __sin(x, 0, 0);
+	case 1:
+		return __cos(x, 0);
+	case 2:
+		/* sin(0-x) and -sin(x) have different sign at 0 */
+		return __sin(0-x, 0, 0);
+	case 3:
+		return -__cos(x, 0);
+	}
+}
+
+#define N 12
+//static const double g = 6.024680040776729583740234375;
+static const double gmhalf = 5.524680040776729583740234375;
+static const double Snum[N+1] = {
+	23531376880.410759688572007674451636754734846804940,
+	42919803642.649098768957899047001988850926355848959,
+	35711959237.355668049440185451547166705960488635843,
+	17921034426.037209699919755754458931112671403265390,
+	6039542586.3520280050642916443072979210699388420708,
+	1439720407.3117216736632230727949123939715485786772,
+	248874557.86205415651146038641322942321632125127801,
+	31426415.585400194380614231628318205362874684987640,
+	2876370.6289353724412254090516208496135991145378768,
+	186056.26539522349504029498971604569928220784236328,
+	8071.6720023658162106380029022722506138218516325024,
+	210.82427775157934587250973392071336271166969580291,
+	2.5066282746310002701649081771338373386264310793408,
+};
+static const double Sden[N+1] = {
+	0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535,
+	2637558, 357423, 32670, 1925, 66, 1,
+};
+/* n! for small integer n */
+static const double fact[] = {
+	1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0,
+	479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0,
+	355687428096000.0, 6402373705728000.0, 121645100408832000.0,
+	2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0,
+};
+
+/* S(x) rational function for positive x */
+static double S(double x)
+{
+	double num = 0, den = 0;
+	int i;
+
+	/* to avoid overflow handle large x differently */
+	if (x < 8)
+		for (i = N; i >= 0; i--) {
+			num = num * x + Snum[i];
+			den = den * x + Sden[i];
+		}
+	else
+		for (i = 0; i <= N; i++) {
+			num = num / x + Snum[i];
+			den = den / x + Sden[i];
+		}
+	return num/den;
+}

 double tgamma(double x)
 {
-	int sign;
-	double y;
+	double absx, y, dy, z, r;

-	y = exp(__lgamma_r(x, &sign));
-	if (sign < 0)
-		y = -y;
-	return y;
+	/* special cases */
+	if (!isfinite(x))
+		/* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
+		return x + INFINITY;
+
+	/* integer arguments */
+	/* raise inexact when non-integer */
+	if (x == floor(x)) {
+		if (x == 0)
+			/* tgamma(+-0)=+-inf with divide-by-zero */
+			return 1/x;
+		if (x < 0)
+			return 0/0.0;
+		if (x <= sizeof fact/sizeof *fact)
+			return fact[(int)x - 1];
+	}
+
+	absx = fabs(x);
+
+	/* x ~ 0: tgamma(x) ~ 1/x */
+	if (absx < 0x1p-54)
+		return 1/x;
+
+	/* x >= 172: tgamma(x)=inf with overflow */
+	/* x =< -184: tgamma(x)=+-0 with underflow */
+	if (absx >= 184) {
+		if (x < 0) {
+			if (floor(x) * 0.5 == floor(x * 0.5))
+				return 0;
+			return -0.0;
+		}
+		x *= 0x1p1023;
+		return x;
+	}
+
+	/* handle the error of x + g - 0.5 */
+	y = absx + gmhalf;
+	if (absx > gmhalf) {
+		dy = y - absx;
+		dy -= gmhalf;
+	} else {
+		dy = y - gmhalf;
+		dy -= absx;
+	}
+
+	z = absx - 0.5;
+	r = S(absx) * exp(-y);
+	if (x < 0) {
+		/* reflection formula for negative x */
+		r = -pi / (sinpi(absx) * absx * r);
+		dy = -dy;
+		z = -z;
+	}
+	r += dy * (gmhalf+0.5) * r / y;
+	z = pow(y, 0.5*z);
+	r = r * z * z;
+	return r;
 }
+
+#if 0
+double __lgamma_r(double x, int *sign)
+{
+	double r, absx, z, zz, w;
+
+	*sign = 1;
+
+	/* special cases */
+	if (!isfinite(x))
+		/* lgamma(nan)=nan, lgamma(+-inf)=inf */
+		return x*x;
+
+	/* integer arguments */
+	if (x == floor(x) && x <= 2) {
+		/* n <= 0: lgamma(n)=inf with divbyzero */
+		/* n == 1,2: lgamma(n)=0 */
+		if (x <= 0)
+			return 1/0.0;
+		return 0;
+	}
+
+	absx = fabs(x);
+
+	/* lgamma(x) ~ -log(|x|) for tiny |x| */
+	if (absx < 0x1p-54) {
+		*sign = 1 - 2*!!signbit(x);
+		return -log(absx);
+	}
+
+	/* use tgamma for smaller |x| */
+	if (absx < 128) {
+		x = tgamma(x);
+		*sign = 1 - 2*!!signbit(x);
+		return log(fabs(x));
+	}
+
+	/* second term (log(S)-g) could be more precise here.. */
+	/* or with stirling: (|x|-0.5)*(log(|x|)-1) + poly(1/|x|) */
+	r = (absx-0.5)*(log(absx+gmhalf)-1) + (log(S(absx)) - (gmhalf+0.5));
+	if (x < 0) {
+		/* reflection formula for negative x */
+		x = sinpi(absx);
+		*sign = 2*!!signbit(x) - 1;
+		r = log(pi/(fabs(x)*absx)) - r;
+	}
+	return r;
+}
+
+weak_alias(__lgamma_r, lgamma_r);
+#endif
--- a/src/math/tgammaf.c
+++ b/src/math/tgammaf.c
@ -1,16 +1,6 @@
 #include <math.h>

-// FIXME: use lanczos approximation
-
-float __lgammaf_r(float, int *);
-
 float tgammaf(float x)
 {
-	int sign;
-	float y;
-
-	y = exp(__lgammaf_r(x, &sign));
-	if (sign < 0)
-		y = -y;
-	return y;
+	return tgamma(x);
 }