From 0f53c1a4266ad4cca28115e2c3bcfdc86337d8ca Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Wed, 12 Dec 2012 01:43:43 +0100
Subject: [PATCH] math: add a non-dummy tgamma implementation

uses the lanczos approximation method with the usual tweaks.
same parameters were selected as in boost and python.
(avoides some extra work and special casing found in boost
so the precision is not that good: measured error is <5ulp for
positive x and <10ulp for negative)

an alternative lgamma_r implementation is also given in the same
file which is simpler and smaller than the current one, but less
precise so it's ifdefed out for now.
---
 src/math/tgamma.c  | 223 +++++++++++++++++++++++++++++++++++++++++++--
 src/math/tgammaf.c |  12 +--
 2 files changed, 215 insertions(+), 20 deletions(-)

diff --git a/src/math/tgamma.c b/src/math/tgamma.c
index f3bbe370..a3f203c1 100644
--- a/src/math/tgamma.c
+++ b/src/math/tgamma.c
@@ -1,16 +1,221 @@
-#include <math.h>
+/*
+"A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964)
+"Lanczos Implementation of the Gamma Function" - Paul Godfrey (2001)
+"An Analysis of the Lanczos Gamma Approximation" - Glendon Ralph Pugh (2004)
 
-// FIXME: use lanczos approximation
+approximation method:
 
-double __lgamma_r(double, int *);
+                        (x - 0.5)         S(x)
+Gamma(x) = (x + g - 0.5)         *  ----------------
+                                    exp(x + g - 0.5)
+
+with
+                 a1      a2      a3            aN
+S(x) ~= [ a0 + ----- + ----- + ----- + ... + ----- ]
+               x + 1   x + 2   x + 3         x + N
+
+with a0, a1, a2, a3,.. aN constants which depend on g.
+
+for x < 0 the following reflection formula is used:
+
+Gamma(x)*Gamma(-x) = -pi/(x sin(pi x))
+
+most ideas and constants are from boost and python
+*/
+#include "libm.h"
+
+static const double pi = 3.141592653589793238462643383279502884;
+
+/* sin(pi x) with x > 0 && isnormal(x) assumption */
+static double sinpi(double x)
+{
+	int n;
+
+	/* argument reduction: x = |x| mod 2 */
+	/* spurious inexact when x is odd int */
+	x = x * 0.5;
+	x = 2 * (x - floor(x));
+
+	/* reduce x into [-.25,.25] */
+	n = 4 * x;
+	n = (n+1)/2;
+	x -= n * 0.5;
+
+	x *= pi;
+	switch (n) {
+	default: /* case 4 */
+	case 0:
+		return __sin(x, 0, 0);
+	case 1:
+		return __cos(x, 0);
+	case 2:
+		/* sin(0-x) and -sin(x) have different sign at 0 */
+		return __sin(0-x, 0, 0);
+	case 3:
+		return -__cos(x, 0);
+	}
+}
+
+#define N 12
+//static const double g = 6.024680040776729583740234375;
+static const double gmhalf = 5.524680040776729583740234375;
+static const double Snum[N+1] = {
+	23531376880.410759688572007674451636754734846804940,
+	42919803642.649098768957899047001988850926355848959,
+	35711959237.355668049440185451547166705960488635843,
+	17921034426.037209699919755754458931112671403265390,
+	6039542586.3520280050642916443072979210699388420708,
+	1439720407.3117216736632230727949123939715485786772,
+	248874557.86205415651146038641322942321632125127801,
+	31426415.585400194380614231628318205362874684987640,
+	2876370.6289353724412254090516208496135991145378768,
+	186056.26539522349504029498971604569928220784236328,
+	8071.6720023658162106380029022722506138218516325024,
+	210.82427775157934587250973392071336271166969580291,
+	2.5066282746310002701649081771338373386264310793408,
+};
+static const double Sden[N+1] = {
+	0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535,
+	2637558, 357423, 32670, 1925, 66, 1,
+};
+/* n! for small integer n */
+static const double fact[] = {
+	1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0,
+	479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0,
+	355687428096000.0, 6402373705728000.0, 121645100408832000.0,
+	2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0,
+};
+
+/* S(x) rational function for positive x */
+static double S(double x)
+{
+	double num = 0, den = 0;
+	int i;
+
+	/* to avoid overflow handle large x differently */
+	if (x < 8)
+		for (i = N; i >= 0; i--) {
+			num = num * x + Snum[i];
+			den = den * x + Sden[i];
+		}
+	else
+		for (i = 0; i <= N; i++) {
+			num = num / x + Snum[i];
+			den = den / x + Sden[i];
+		}
+	return num/den;
+}
 
 double tgamma(double x)
 {
-	int sign;
-	double y;
+	double absx, y, dy, z, r;
 
-	y = exp(__lgamma_r(x, &sign));
-	if (sign < 0)
-		y = -y;
-	return y;
+	/* special cases */
+	if (!isfinite(x))
+		/* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
+		return x + INFINITY;
+
+	/* integer arguments */
+	/* raise inexact when non-integer */
+	if (x == floor(x)) {
+		if (x == 0)
+			/* tgamma(+-0)=+-inf with divide-by-zero */
+			return 1/x;
+		if (x < 0)
+			return 0/0.0;
+		if (x <= sizeof fact/sizeof *fact)
+			return fact[(int)x - 1];
+	}
+
+	absx = fabs(x);
+
+	/* x ~ 0: tgamma(x) ~ 1/x */
+	if (absx < 0x1p-54)
+		return 1/x;
+
+	/* x >= 172: tgamma(x)=inf with overflow */
+	/* x =< -184: tgamma(x)=+-0 with underflow */
+	if (absx >= 184) {
+		if (x < 0) {
+			if (floor(x) * 0.5 == floor(x * 0.5))
+				return 0;
+			return -0.0;
+		}
+		x *= 0x1p1023;
+		return x;
+	}
+
+	/* handle the error of x + g - 0.5 */
+	y = absx + gmhalf;
+	if (absx > gmhalf) {
+		dy = y - absx;
+		dy -= gmhalf;
+	} else {
+		dy = y - gmhalf;
+		dy -= absx;
+	}
+
+	z = absx - 0.5;
+	r = S(absx) * exp(-y);
+	if (x < 0) {
+		/* reflection formula for negative x */
+		r = -pi / (sinpi(absx) * absx * r);
+		dy = -dy;
+		z = -z;
+	}
+	r += dy * (gmhalf+0.5) * r / y;
+	z = pow(y, 0.5*z);
+	r = r * z * z;
+	return r;
 }
+
+#if 0
+double __lgamma_r(double x, int *sign)
+{
+	double r, absx, z, zz, w;
+
+	*sign = 1;
+
+	/* special cases */
+	if (!isfinite(x))
+		/* lgamma(nan)=nan, lgamma(+-inf)=inf */
+		return x*x;
+
+	/* integer arguments */
+	if (x == floor(x) && x <= 2) {
+		/* n <= 0: lgamma(n)=inf with divbyzero */
+		/* n == 1,2: lgamma(n)=0 */
+		if (x <= 0)
+			return 1/0.0;
+		return 0;
+	}
+
+	absx = fabs(x);
+
+	/* lgamma(x) ~ -log(|x|) for tiny |x| */
+	if (absx < 0x1p-54) {
+		*sign = 1 - 2*!!signbit(x);
+		return -log(absx);
+	}
+
+	/* use tgamma for smaller |x| */
+	if (absx < 128) {
+		x = tgamma(x);
+		*sign = 1 - 2*!!signbit(x);
+		return log(fabs(x));
+	}
+
+	/* second term (log(S)-g) could be more precise here.. */
+	/* or with stirling: (|x|-0.5)*(log(|x|)-1) + poly(1/|x|) */
+	r = (absx-0.5)*(log(absx+gmhalf)-1) + (log(S(absx)) - (gmhalf+0.5));
+	if (x < 0) {
+		/* reflection formula for negative x */
+		x = sinpi(absx);
+		*sign = 2*!!signbit(x) - 1;
+		r = log(pi/(fabs(x)*absx)) - r;
+	}
+	return r;
+}
+
+weak_alias(__lgamma_r, lgamma_r);
+#endif
diff --git a/src/math/tgammaf.c b/src/math/tgammaf.c
index 16df8076..b4ca51c9 100644
--- a/src/math/tgammaf.c
+++ b/src/math/tgammaf.c
@@ -1,16 +1,6 @@
 #include <math.h>
 
-// FIXME: use lanczos approximation
-
-float __lgammaf_r(float, int *);
-
 float tgammaf(float x)
 {
-	int sign;
-	float y;
-
-	y = exp(__lgammaf_r(x, &sign));
-	if (sign < 0)
-		y = -y;
-	return y;
+	return tgamma(x);
 }