math: use the rounding idiom consistently

the idiomatic rounding of x is

  n = x + toint - toint;

where toint is either 1/EPSILON (x is non-negative) or 1.5/EPSILON
(x may be negative and nearest rounding mode is assumed) and EPSILON is
according to the evaluation precision (the type of toint is not very
important, because single precision float can represent the 1/EPSILON of
ieee binary128).

in case of FLT_EVAL_METHOD!=0 this avoids a useless store to double or
float precision, and the long double code became cleaner with
1/LDBL_EPSILON instead of ifdefs for toint.

__rem_pio2f and __rem_pio2 functions slightly changed semantics:
on i386 a double-rounding is avoided so close to half-way cases may
get evaluated differently eg. as sin(pi/4-eps) instead of cos(pi/4+eps)
This commit is contained in:
Szabolcs Nagy 2014-10-29 00:34:37 +01:00 committed by Rich Felker
parent 79ca86094d
commit 0ce946cf80
13 changed files with 89 additions and 58 deletions

View File

@ -19,6 +19,12 @@
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
/*
* invpio2: 53 bits of 2/pi
* pio2_1: first 33 bit of pi/2
@ -29,6 +35,7 @@
* pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
*/
static const double
toint = 1.5/EPS,
invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
@ -41,8 +48,8 @@ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
int __rem_pio2(double x, double *y)
{
union {double f; uint64_t i;} u = {x};
double_t z,w,t,r;
double tx[3],ty[2],fn;
double_t z,w,t,r,fn;
double tx[3],ty[2];
uint32_t ix;
int sign, n, ex, ey, i;
@ -111,8 +118,7 @@ int __rem_pio2(double x, double *y)
if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
medium:
/* rint(x/(pi/2)), Assume round-to-nearest. */
fn = x*invpio2 + 0x1.8p52;
fn = fn - 0x1.8p52;
fn = x*invpio2 + toint - toint;
n = (int32_t)fn;
r = x - fn*pio2_1;
w = fn*pio2_1t; /* 1st round, good to 85 bits */

View File

@ -22,12 +22,19 @@
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
/*
* invpio2: 53 bits of 2/pi
* pio2_1: first 25 bits of pi/2
* pio2_1t: pi/2 - pio2_1
*/
static const double
toint = 1.5/EPS,
invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
pio2_1 = 1.57079631090164184570e+00, /* 0x3FF921FB, 0x50000000 */
pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */
@ -35,7 +42,8 @@ pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */
int __rem_pio2f(float x, double *y)
{
union {float f; uint32_t i;} u = {x};
double tx[1],ty[1],fn;
double tx[1],ty[1];
double_t fn;
uint32_t ix;
int n, sign, e0;
@ -43,8 +51,7 @@ int __rem_pio2f(float x, double *y)
/* 25+53 bit pi is good enough for medium size */
if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
/* Use a specialized rint() to get fn. Assume round-to-nearest. */
fn = x*invpio2 + 0x1.8p52;
fn = fn - 0x1.8p52;
fn = x*invpio2 + toint - toint;
n = (int32_t)fn;
*y = x - fn*pio2_1 - fn*pio2_1t;
return n;

View File

@ -20,10 +20,11 @@
* use __rem_pio2_large() for large x
*/
static const long double toint = 1.5/LDBL_EPSILON;
#if LDBL_MANT_DIG == 64
/* u ~< 0x1p25*pi/2 */
#define SMALL(u) (((u.i.se & 0x7fffU)<<16 | u.i.m>>48) < ((0x3fff + 25)<<16 | 0x921f>>1 | 0x8000))
#define TOINT 0x1.8p63
#define QUOBITS(x) ((uint32_t)(int32_t)x & 0x7fffffff)
#define ROUND1 22
#define ROUND2 61
@ -50,7 +51,6 @@ pio2_3t = -2.75299651904407171810e-37L; /* -0xbb5bf6c7ddd660ce.0p-185 */
#elif LDBL_MANT_DIG == 113
/* u ~< 0x1p45*pi/2 */
#define SMALL(u) (((u.i.se & 0x7fffU)<<16 | u.i.top) < ((0x3fff + 45)<<16 | 0x921f))
#define TOINT 0x1.8p112
#define QUOBITS(x) ((uint32_t)(int64_t)x & 0x7fffffff)
#define ROUND1 51
#define ROUND2 119
@ -77,7 +77,7 @@ int __rem_pio2l(long double x, long double *y)
ex = u.i.se & 0x7fff;
if (SMALL(u)) {
/* rint(x/(pi/2)), Assume round-to-nearest. */
fn = x*invpio2 + TOINT - TOINT;
fn = x*invpio2 + toint - toint;
n = QUOBITS(fn);
r = x-fn*pio2_1;
w = fn*pio2_1t; /* 1st round good to 102/180 bits (ld80/ld128) */

View File

@ -1,5 +1,12 @@
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
static const double_t toint = 1/EPS;
double ceil(double x)
{
union {double f; uint64_t i;} u = {x};
@ -10,9 +17,9 @@ double ceil(double x)
return x;
/* y = int(x) - x, where int(x) is an integer neighbor of x */
if (u.i >> 63)
y = (double)(x - 0x1p52) + 0x1p52 - x;
y = x - toint + toint - x;
else
y = (double)(x + 0x1p52) - 0x1p52 - x;
y = x + toint - toint - x;
/* special case because of non-nearest rounding modes */
if (e <= 0x3ff-1) {
FORCE_EVAL(y);

View File

@ -6,11 +6,9 @@ long double ceill(long double x)
return ceil(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#if LDBL_MANT_DIG == 64
#define TOINT 0x1p63
#elif LDBL_MANT_DIG == 113
#define TOINT 0x1p112
#endif
static const long double toint = 1/LDBL_EPSILON;
long double ceill(long double x)
{
union ldshape u = {x};
@ -21,9 +19,9 @@ long double ceill(long double x)
return x;
/* y = int(x) - x, where int(x) is an integer neighbor of x */
if (u.i.se >> 15)
y = x - TOINT + TOINT - x;
y = x - toint + toint - x;
else
y = x + TOINT - TOINT - x;
y = x + toint - toint - x;
/* special case because of non-nearest rounding modes */
if (e <= 0x3fff-1) {
FORCE_EVAL(y);

View File

@ -1,5 +1,12 @@
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
static const double_t toint = 1/EPS;
double floor(double x)
{
union {double f; uint64_t i;} u = {x};
@ -10,9 +17,9 @@ double floor(double x)
return x;
/* y = int(x) - x, where int(x) is an integer neighbor of x */
if (u.i >> 63)
y = (double)(x - 0x1p52) + 0x1p52 - x;
y = x - toint + toint - x;
else
y = (double)(x + 0x1p52) - 0x1p52 - x;
y = x + toint - toint - x;
/* special case because of non-nearest rounding modes */
if (e <= 0x3ff-1) {
FORCE_EVAL(y);

View File

@ -6,11 +6,9 @@ long double floorl(long double x)
return floor(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#if LDBL_MANT_DIG == 64
#define TOINT 0x1p63
#elif LDBL_MANT_DIG == 113
#define TOINT 0x1p112
#endif
static const long double toint = 1/LDBL_EPSILON;
long double floorl(long double x)
{
union ldshape u = {x};
@ -21,9 +19,9 @@ long double floorl(long double x)
return x;
/* y = int(x) - x, where int(x) is an integer neighbor of x */
if (u.i.se >> 15)
y = x - TOINT + TOINT - x;
y = x - toint + toint - x;
else
y = x + TOINT - TOINT - x;
y = x + toint - toint - x;
/* special case because of non-nearest rounding modes */
if (e <= 0x3fff-1) {
FORCE_EVAL(y);

View File

@ -11,11 +11,9 @@ long double modfl(long double x, long double *iptr)
return r;
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#if LDBL_MANT_DIG == 64
#define TOINT 0x1p63
#elif LDBL_MANT_DIG == 113
#define TOINT 0x1p112
#endif
static const long double toint = 1/LDBL_EPSILON;
long double modfl(long double x, long double *iptr)
{
union ldshape u = {x};
@ -40,7 +38,7 @@ long double modfl(long double x, long double *iptr)
/* raises spurious inexact */
absx = s ? -x : x;
y = absx + TOINT - TOINT - absx;
y = absx + toint - toint - absx;
if (y == 0) {
*iptr = x;
return s ? -0.0 : 0.0;

View File

@ -6,11 +6,9 @@ long double rintl(long double x)
return rint(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#if LDBL_MANT_DIG == 64
#define TOINT 0x1p63
#elif LDBL_MANT_DIG == 113
#define TOINT 0x1p112
#endif
static const long double toint = 1/LDBL_EPSILON;
long double rintl(long double x)
{
union ldshape u = {x};
@ -21,9 +19,9 @@ long double rintl(long double x)
if (e >= 0x3fff+LDBL_MANT_DIG-1)
return x;
if (s)
y = x - TOINT + TOINT;
y = x - toint + toint;
else
y = x + TOINT - TOINT;
y = x + toint - toint;
if (y == 0)
return 0*x;
return y;

View File

@ -1,5 +1,12 @@
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
static const double_t toint = 1/EPS;
double round(double x)
{
union {double f; uint64_t i;} u = {x};
@ -12,10 +19,10 @@ double round(double x)
x = -x;
if (e < 0x3ff-1) {
/* raise inexact if x!=0 */
FORCE_EVAL(x + 0x1p52);
FORCE_EVAL(x + toint);
return 0*u.f;
}
y = (double)(x + 0x1p52) - 0x1p52 - x;
y = x + toint - toint - x;
if (y > 0.5)
y = y + x - 1;
else if (y <= -0.5)

View File

@ -1,5 +1,14 @@
#include "libm.h"
#if FLT_EVAL_METHOD==0
#define EPS FLT_EPSILON
#elif FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
static const float_t toint = 1/EPS;
float roundf(float x)
{
union {float f; uint32_t i;} u = {x};
@ -11,10 +20,10 @@ float roundf(float x)
if (u.i >> 31)
x = -x;
if (e < 0x7f-1) {
FORCE_EVAL(x + 0x1p23f);
FORCE_EVAL(x + toint);
return 0*u.f;
}
y = (float)(x + 0x1p23f) - 0x1p23f - x;
y = x + toint - toint - x;
if (y > 0.5f)
y = y + x - 1;
else if (y <= -0.5f)

View File

@ -6,11 +6,9 @@ long double roundl(long double x)
return round(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#if LDBL_MANT_DIG == 64
#define TOINT 0x1p63
#elif LDBL_MANT_DIG == 113
#define TOINT 0x1p112
#endif
static const long double toint = 1/LDBL_EPSILON;
long double roundl(long double x)
{
union ldshape u = {x};
@ -22,10 +20,10 @@ long double roundl(long double x)
if (u.i.se >> 15)
x = -x;
if (e < 0x3fff-1) {
FORCE_EVAL(x + TOINT);
FORCE_EVAL(x + toint);
return 0*u.f;
}
y = x + TOINT - TOINT - x;
y = x + toint - toint - x;
if (y > 0.5)
y = y + x - 1;
else if (y <= -0.5)

View File

@ -6,11 +6,9 @@ long double truncl(long double x)
return trunc(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#if LDBL_MANT_DIG == 64
#define TOINT 0x1p63
#elif LDBL_MANT_DIG == 113
#define TOINT 0x1p112
#endif
static const long double toint = 1/LDBL_EPSILON;
long double truncl(long double x)
{
union ldshape u = {x};
@ -27,7 +25,7 @@ long double truncl(long double x)
/* y = int(|x|) - |x|, where int(|x|) is an integer neighbor of |x| */
if (s)
x = -x;
y = x + TOINT - TOINT - x;
y = x + toint - toint - x;
if (y > 0)
y -= 1;
x += y;