math: fix __rem_pio2 in non-nearest rounding modes

Handle when after reduction |y| > pi/4+tiny. This happens in directed
rounding modes because the fast round to int code does not give the
nearest integer. In such cases the reduction may not be symmetric
between x and -x so e.g. cos(x)==cos(-x) may not hold (but polynomial
evaluation is not symmetric either with directed rounding so fixing
that would require more changes with bigger performance impact).

The fix only adds two predictable branches in nearest rounding mode,
simple ubenchmark does not show relevant performance regression in
nearest rounding mode.

The code could be improved: e.g reducing the medium size threshold
such that two step reduction is enough instead of three, and the
single precision case can avoid the issue by doing the round to int
differently, but this fix was kept minimal.
This commit is contained in:
Szabolcs Nagy 2020-01-18 17:55:25 +00:00 committed by Rich Felker
parent 040c1d16b4
commit b3797d3b2e
3 changed files with 41 additions and 3 deletions

View File

@ -36,6 +36,7 @@
*/
static const double
toint = 1.5/EPS,
pio4 = 0x1.921fb54442d18p-1,
invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
@ -117,11 +118,23 @@ int __rem_pio2(double x, double *y)
}
if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
medium:
/* rint(x/(pi/2)), Assume round-to-nearest. */
/* rint(x/(pi/2)) */
fn = (double_t)x*invpio2 + toint - toint;
n = (int32_t)fn;
r = x - fn*pio2_1;
w = fn*pio2_1t; /* 1st round, good to 85 bits */
/* Matters with directed rounding. */
if (predict_false(r - w < -pio4)) {
n--;
fn--;
r = x - fn*pio2_1;
w = fn*pio2_1t;
} else if (predict_false(r - w > pio4)) {
n++;
fn++;
r = x - fn*pio2_1;
w = fn*pio2_1t;
}
y[0] = r - w;
u.f = y[0];
ey = u.i>>52 & 0x7ff;

View File

@ -35,6 +35,7 @@
*/
static const double
toint = 1.5/EPS,
pio4 = 0x1.921fb6p-1,
invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
pio2_1 = 1.57079631090164184570e+00, /* 0x3FF921FB, 0x50000000 */
pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */
@ -50,10 +51,20 @@ int __rem_pio2f(float x, double *y)
ix = u.i & 0x7fffffff;
/* 25+53 bit pi is good enough for medium size */
if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
/* Use a specialized rint() to get fn. Assume round-to-nearest. */
/* Use a specialized rint() to get fn. */
fn = (double_t)x*invpio2 + toint - toint;
n = (int32_t)fn;
*y = x - fn*pio2_1 - fn*pio2_1t;
/* Matters with directed rounding. */
if (predict_false(*y < -pio4)) {
n--;
fn--;
*y = x - fn*pio2_1 - fn*pio2_1t;
} else if (predict_false(*y > pio4)) {
n++;
fn++;
*y = x - fn*pio2_1 - fn*pio2_1t;
}
return n;
}
if(ix>=0x7f800000) { /* x is inf or NaN */

View File

@ -44,6 +44,7 @@ pio2_1 = 1.57079632679597125389e+00, /* 0x3FF921FB, 0x54444000 */
pio2_2 = -1.07463465549783099519e-12, /* -0x12e7b967674000.0p-92 */
pio2_3 = 6.36831716351370313614e-25; /* 0x18a2e037074000.0p-133 */
static const long double
pio4 = 0x1.921fb54442d1846ap-1L,
invpio2 = 6.36619772367581343076e-01L, /* 0xa2f9836e4e44152a.0p-64 */
pio2_1t = -1.07463465549719416346e-12L, /* -0x973dcb3b399d747f.0p-103 */
pio2_2t = 6.36831716351095013979e-25L, /* 0xc51701b839a25205.0p-144 */
@ -57,6 +58,7 @@ pio2_3t = -2.75299651904407171810e-37L; /* -0xbb5bf6c7ddd660ce.0p-185 */
#define NX 5
#define NY 3
static const long double
pio4 = 0x1.921fb54442d18469898cc51701b8p-1L,
invpio2 = 6.3661977236758134307553505349005747e-01L, /* 0x145f306dc9c882a53f84eafa3ea6a.0p-113 */
pio2_1 = 1.5707963267948966192292994253909555e+00L, /* 0x1921fb54442d18469800000000000.0p-112 */
pio2_1t = 2.0222662487959507323996846200947577e-21L, /* 0x13198a2e03707344a4093822299f3.0p-181 */
@ -76,11 +78,23 @@ int __rem_pio2l(long double x, long double *y)
u.f = x;
ex = u.i.se & 0x7fff;
if (SMALL(u)) {
/* rint(x/(pi/2)), Assume round-to-nearest. */
/* rint(x/(pi/2)) */
fn = x*invpio2 + toint - toint;
n = QUOBITS(fn);
r = x-fn*pio2_1;
w = fn*pio2_1t; /* 1st round good to 102/180 bits (ld80/ld128) */
/* Matters with directed rounding. */
if (predict_false(r - w < -pio4)) {
n--;
fn--;
r = x - fn*pio2_1;
w = fn*pio2_1t;
} else if (predict_false(r - w > pio4)) {
n++;
fn++;
r = x - fn*pio2_1;
w = fn*pio2_1t;
}
y[0] = r-w;
u.f = y[0];
ey = u.i.se & 0x7fff;