math: clean up __rem_pio2

- remove the HAVE_EFFICIENT_IRINT case: fn is an exact integer, so it can be converted to int32_t a bit more efficiently than with a cast (the rounding mode change can be avoided), but musl does not support this case on any arch. - __rem_pio2: use double_t where possible - __rem_pio2f: use less assignments to avoid stores on i386 - use unsigned int bit manipulation (and union instead of macros) - use hexfloat literals instead of named constants
2025-02-18 20:06:50 +00:00 · 2013-11-24 01:06:38 +00:00 · 2013-11-24 01:06:38 +00:00 · 3fdf94ec51
commit 3fdf94ec51
parent 10c8b7148b
3 changed files with 53 additions and 71 deletions
--- a/src/math/__rem_pio2.c
+++ b/src/math/__rem_pio2.c
@ -29,7 +29,6 @@
 * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
 */
 static const double
-two24   = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
 pio2_1  = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
@ -41,18 +40,19 @@ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
 /* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
 int __rem_pio2(double x, double *y)
 {
-	double z,w,t,r,fn;
-	double tx[3],ty[2];
-	int32_t e0,i,j,nx,n,ix,hx;
-	uint32_t low;
+	union {double f; uint64_t i;} u = {x};
+	double_t z,w,t,r;
+	double tx[3],ty[2],fn;
+	uint32_t ix;
+	int sign, n, ex, ey, i;

-	GET_HIGH_WORD(hx,x);
-	ix = hx & 0x7fffffff;
+	sign = u.i>>63;
+	ix = u.i>>32 & 0x7fffffff;
 	if (ix <= 0x400f6a7a) {  /* |x| ~<= 5pi/4 */
 		if ((ix & 0xfffff) == 0x921fb)  /* |x| ~= pi/2 or 2pi/2 */
 			goto medium;  /* cancellation -- use medium case */
 		if (ix <= 0x4002d97c) {  /* |x| ~<= 3pi/4 */
-			if (hx > 0) {
+			if (!sign) {
 				z = x - pio2_1;  /* one round good to 85 bits */
 				y[0] = z - pio2_1t;
 				y[1] = (z-y[0]) - pio2_1t;
@ -64,7 +64,7 @@ int __rem_pio2(double x, double *y)
 				return -1;
 			}
 		} else {
-			if (hx > 0) {
+			if (!sign) {
 				z = x - 2*pio2_1;
 				y[0] = z - 2*pio2_1t;
 				y[1] = (z-y[0]) - 2*pio2_1t;
@ -81,7 +81,7 @@ int __rem_pio2(double x, double *y)
 		if (ix <= 0x4015fdbc) {  /* |x| ~<= 7pi/4 */
 			if (ix == 0x4012d97c)  /* |x| ~= 3pi/2 */
 				goto medium;
-			if (hx > 0) {
+			if (!sign) {
 				z = x - 3*pio2_1;
 				y[0] = z - 3*pio2_1t;
 				y[1] = (z-y[0]) - 3*pio2_1t;
@ -95,7 +95,7 @@ int __rem_pio2(double x, double *y)
 		} else {
 			if (ix == 0x401921fb)  /* |x| ~= 4pi/2 */
 				goto medium;
-			if (hx > 0) {
+			if (!sign) {
 				z = x - 4*pio2_1;
 				y[0] = z - 4*pio2_1t;
 				y[1] = (z-y[0]) - 4*pio2_1t;
@ -109,32 +109,26 @@ int __rem_pio2(double x, double *y)
 		}
 	}
 	if (ix < 0x413921fb) {  /* |x| ~< 2^20*(pi/2), medium size */
-		uint32_t high;
 medium:
-		/* Use a specialized rint() to get fn.  Assume round-to-nearest. */
+		/* rint(x/(pi/2)), Assume round-to-nearest. */
 		fn = x*invpio2 + 0x1.8p52;
 		fn = fn - 0x1.8p52;
-// FIXME
-#ifdef HAVE_EFFICIENT_IRINT
-		n = irint(fn);
-#else
 		n = (int32_t)fn;
-#endif
 		r = x - fn*pio2_1;
 		w = fn*pio2_1t;  /* 1st round, good to 85 bits */
-		j = ix>>20;
 		y[0] = r - w;
-		GET_HIGH_WORD(high,y[0]);
-		i = j - ((high>>20)&0x7ff);
-		if (i > 16) {  /* 2nd round, good to 118 bits */
+		u.f = y[0];
+		ey = u.i>>52 & 0x7ff;
+		ex = ix>>20;
+		if (ex - ey > 16) { /* 2nd round, good to 118 bits */
 			t = r;
 			w = fn*pio2_2;
 			r = t - w;
 			w = fn*pio2_2t - ((t-r)-w);
 			y[0] = r - w;
-			GET_HIGH_WORD(high,y[0]);
-			i = j - ((high>>20)&0x7ff);
-			if (i > 49) {  /* 3rd round, good to 151 bits, covers all cases */
+			u.f = y[0];
+			ey = u.i>>52 & 0x7ff;
+			if (ex - ey > 49) {  /* 3rd round, good to 151 bits, covers all cases */
 				t = r;
 				w = fn*pio2_3;
 				r = t - w;
@ -142,7 +136,7 @@ medium:
 				y[0] = r - w;
 			}
 		}
-		y[1] = (r-y[0]) - w;
+		y[1] = (r - y[0]) - w;
 		return n;
 	}
 	/*
@ -152,19 +146,21 @@ medium:
 		y[0] = y[1] = x - x;
 		return 0;
 	}
-	/* set z = scalbn(|x|,ilogb(x)-23) */
-	GET_LOW_WORD(low,x);
-	e0 = (ix>>20) - 1046;  /* e0 = ilogb(z)-23; */
-	INSERT_WORDS(z, ix - ((int32_t)(e0<<20)), low);
-	for (i=0; i<2; i++) {
-		tx[i] = (double)((int32_t)(z));
-		z = (z-tx[i])*two24;
+	/* set z = scalbn(|x|,-ilogb(x)+23) */
+	u.f = x;
+	u.i &= (uint64_t)-1>>12;
+	u.i |= (uint64_t)(0x3ff + 23)<<52;
+	z = u.f;
+	for (i=0; i < 2; i++) {
+		tx[i] = (double)(int32_t)z;
+		z     = (z-tx[i])*0x1p24;
 	}
-	tx[2] = z;
-	nx = 3;
-	while (tx[nx-1] == 0.0) nx--;  /* skip zero term */
-	n = __rem_pio2_large(tx,ty,e0,nx,1);
-	if (hx < 0) {
+	tx[i] = z;
+	/* skip zero terms, first term is non-zero */
+	while (tx[i] == 0.0)
+		i--;
+	n = __rem_pio2_large(tx,ty,(int)(ix>>20)-(0x3ff+23),i+1,1);
+	if (sign) {
 		y[0] = -ty[0];
 		y[1] = -ty[1];
 		return -n;
--- a/src/math/__rem_pio2_large.c
+++ b/src/math/__rem_pio2_large.c
@ -270,10 +270,6 @@ static const double PIo2[] = {
  2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
 };

-static const double
-two24  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
-twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
-
 int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
 {
 	int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
@ -304,8 +300,8 @@ int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
 recompute:
 	/* distill q[] into iq[] reversingly */
 	for (i=0,j=jz,z=q[jz]; j>0; i++,j--) {
-		fw    = (double)((int32_t)(twon24* z));
-		iq[i] = (int32_t)(z-two24*fw);
+		fw    = (double)(int32_t)(0x1p-24*z);
+		iq[i] = (int32_t)(z - 0x1p24*fw);
 		z     = q[j-1]+fw;
 	}

@ -330,7 +326,7 @@ recompute:
 			if (carry == 0) {
 				if (j != 0) {
 					carry = 1;
-					iq[i] = 0x1000000- j;
+					iq[i] = 0x1000000 - j;
 				}
 			} else
 				iq[i] = 0xffffff - j;
@ -378,9 +374,9 @@ recompute:
 		}
 	} else { /* break z into 24-bit if necessary */
 		z = scalbn(z,-q0);
-		if (z >= two24) {
-			fw = (double)((int32_t)(twon24*z));
-			iq[jz] = (int32_t)(z-two24*fw);
+		if (z >= 0x1p24) {
+			fw = (double)(int32_t)(0x1p-24*z);
+			iq[jz] = (int32_t)(z - 0x1p24*fw);
 			jz += 1;
 			q0 += 24;
 			iq[jz] = (int32_t)fw;
@ -392,7 +388,7 @@ recompute:
 	fw = scalbn(1.0,q0);
 	for (i=jz; i>=0; i--) {
 		q[i] = fw*(double)iq[i];
-		fw *= twon24;
+		fw *= 0x1p-24;
 	}

 	/* compute PIo2[0,...,jp]*q[jz,...,0] */
--- a/src/math/__rem_pio2f.c
+++ b/src/math/__rem_pio2f.c
@ -34,42 +34,32 @@ pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */

 int __rem_pio2f(float x, double *y)
 {
-	double w,r,fn;
-	double tx[1],ty[1];
-	float z;
-	int32_t e0,n,ix,hx;
+	union {float f; uint32_t i;} u = {x};
+	double tx[1],ty[1],fn;
+	uint32_t ix;
+	int n, sign, e0;

-	GET_FLOAT_WORD(hx, x);
-	ix = hx & 0x7fffffff;
+	ix = u.i & 0x7fffffff;
 	/* 25+53 bit pi is good enough for medium size */
 	if (ix < 0x4dc90fdb) {  /* |x| ~< 2^28*(pi/2), medium size */
 		/* Use a specialized rint() to get fn.  Assume round-to-nearest. */
 		fn = x*invpio2 + 0x1.8p52;
 		fn = fn - 0x1.8p52;
-// FIXME
-#ifdef HAVE_EFFICIENT_IRINT
-		n  = irint(fn);
-#else
 		n  = (int32_t)fn;
-#endif
-		r  = x - fn*pio2_1;
-		w  = fn*pio2_1t;
-		*y = r - w;
+		*y = x - fn*pio2_1 - fn*pio2_1t;
 		return n;
 	}
-	/*
-	 * all other (large) arguments
-	 */
 	if(ix>=0x7f800000) {  /* x is inf or NaN */
 		*y = x-x;
 		return 0;
 	}
-	/* set z = scalbn(|x|,ilogb(|x|)-23) */
-	e0 = (ix>>23) - 150;  /* e0 = ilogb(|x|)-23; */
-	SET_FLOAT_WORD(z, ix - ((int32_t)(e0<<23)));
-	tx[0] = z;
+	/* scale x into [2^23, 2^24-1] */
+	sign = u.i>>31;
+	e0 = (ix>>23) - (0x7f+23);  /* e0 = ilogb(|x|)-23, positive */
+	u.i = ix - (e0<<23);
+	tx[0] = u.f;
 	n  =  __rem_pio2_large(tx,ty,e0,1,0);
-	if (hx < 0) {
+	if (sign) {
 		*y = -ty[0];
 		return -n;
 	}