mirror of git://git.musl-libc.org/musl
math: new logf
from https://github.com/ARM-software/optimized-routines, commit 04884bd04eac4b251da4026900010ea7d8850edc, with minor changes to better fit into musl. code size change: +289 bytes. benchmark on x86_64 before, after, speedup: -Os: logf rthruput: 8.40 ns/call 6.14 ns/call 1.37x logf latency: 31.79 ns/call 24.33 ns/call 1.31x -O3: logf rthruput: 8.43 ns/call 5.58 ns/call 1.51x logf latency: 32.04 ns/call 20.88 ns/call 1.53x
This commit is contained in:
parent
169fc008d8
commit
db505b794c
110
src/math/logf.c
110
src/math/logf.c
|
@ -1,69 +1,71 @@
|
|||
/* origin: FreeBSD /usr/src/lib/msun/src/e_logf.c */
|
||||
/*
|
||||
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
||||
*/
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
* Single-precision log function.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "libm.h"
|
||||
#include "logf_data.h"
|
||||
|
||||
static const float
|
||||
ln2_hi = 6.9313812256e-01, /* 0x3f317180 */
|
||||
ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */
|
||||
/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */
|
||||
Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */
|
||||
Lg2 = 0xccce13.0p-25, /* 0.40000972152 */
|
||||
Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */
|
||||
Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */
|
||||
/*
|
||||
LOGF_TABLE_BITS = 4
|
||||
LOGF_POLY_ORDER = 4
|
||||
|
||||
ULP error: 0.818 (nearest rounding.)
|
||||
Relative error: 1.957 * 2^-26 (before rounding.)
|
||||
*/
|
||||
|
||||
#define T __logf_data.tab
|
||||
#define A __logf_data.poly
|
||||
#define Ln2 __logf_data.ln2
|
||||
#define N (1 << LOGF_TABLE_BITS)
|
||||
#define OFF 0x3f330000
|
||||
|
||||
float logf(float x)
|
||||
{
|
||||
union {float f; uint32_t i;} u = {x};
|
||||
float_t hfsq,f,s,z,R,w,t1,t2,dk;
|
||||
uint32_t ix;
|
||||
int k;
|
||||
double_t z, r, r2, y, y0, invc, logc;
|
||||
uint32_t ix, iz, tmp;
|
||||
int k, i;
|
||||
|
||||
ix = u.i;
|
||||
k = 0;
|
||||
if (ix < 0x00800000 || ix>>31) { /* x < 2**-126 */
|
||||
if (ix<<1 == 0)
|
||||
return -1/(x*x); /* log(+-0)=-inf */
|
||||
if (ix>>31)
|
||||
return (x-x)/0.0f; /* log(-#) = NaN */
|
||||
/* subnormal number, scale up x */
|
||||
k -= 25;
|
||||
x *= 0x1p25f;
|
||||
u.f = x;
|
||||
ix = u.i;
|
||||
} else if (ix >= 0x7f800000) {
|
||||
return x;
|
||||
} else if (ix == 0x3f800000)
|
||||
ix = asuint(x);
|
||||
/* Fix sign of zero with downward rounding when x==1. */
|
||||
if (WANT_ROUNDING && predict_false(ix == 0x3f800000))
|
||||
return 0;
|
||||
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
|
||||
/* x < 0x1p-126 or inf or nan. */
|
||||
if (ix * 2 == 0)
|
||||
return __math_divzerof(1);
|
||||
if (ix == 0x7f800000) /* log(inf) == inf. */
|
||||
return x;
|
||||
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
|
||||
return __math_invalidf(x);
|
||||
/* x is subnormal, normalize it. */
|
||||
ix = asuint(x * 0x1p23f);
|
||||
ix -= 23 << 23;
|
||||
}
|
||||
|
||||
/* reduce x into [sqrt(2)/2, sqrt(2)] */
|
||||
ix += 0x3f800000 - 0x3f3504f3;
|
||||
k += (int)(ix>>23) - 0x7f;
|
||||
ix = (ix&0x007fffff) + 0x3f3504f3;
|
||||
u.i = ix;
|
||||
x = u.f;
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
|
||||
k = (int32_t)tmp >> 23; /* arithmetic shift */
|
||||
iz = ix - (tmp & 0x1ff << 23);
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
z = (double_t)asfloat(iz);
|
||||
|
||||
f = x - 1.0f;
|
||||
s = f/(2.0f + f);
|
||||
z = s*s;
|
||||
w = z*z;
|
||||
t1= w*(Lg2+w*Lg4);
|
||||
t2= z*(Lg1+w*Lg3);
|
||||
R = t2 + t1;
|
||||
hfsq = 0.5f*f*f;
|
||||
dk = k;
|
||||
return s*(hfsq+R) + dk*ln2_lo - hfsq + f + dk*ln2_hi;
|
||||
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
|
||||
r = z * invc - 1;
|
||||
y0 = logc + (double_t)k * Ln2;
|
||||
|
||||
/* Pipelined polynomial evaluation to approximate log1p(r). */
|
||||
r2 = r * r;
|
||||
y = A[1] * r + A[2];
|
||||
y = A[0] * r2 + y;
|
||||
y = y * r2 + (y0 + r);
|
||||
return eval_as_float(y);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Data definition for logf.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "logf_data.h"
|
||||
|
||||
const struct logf_data __logf_data = {
|
||||
.tab = {
|
||||
{ 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
|
||||
{ 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
|
||||
{ 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
|
||||
{ 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
|
||||
{ 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
|
||||
{ 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
|
||||
{ 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
|
||||
{ 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
|
||||
{ 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
|
||||
{ 0x1p+0, 0x0p+0 },
|
||||
{ 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
|
||||
{ 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
|
||||
{ 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
|
||||
{ 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
|
||||
{ 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
|
||||
{ 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
|
||||
},
|
||||
.ln2 = 0x1.62e42fefa39efp-1,
|
||||
.poly = {
|
||||
-0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
|
||||
}
|
||||
};
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#ifndef _LOGF_DATA_H
|
||||
#define _LOGF_DATA_H
|
||||
|
||||
#include <features.h>
|
||||
|
||||
#define LOGF_TABLE_BITS 4
|
||||
#define LOGF_POLY_ORDER 4
|
||||
extern hidden const struct logf_data {
|
||||
struct {
|
||||
double invc, logc;
|
||||
} tab[1 << LOGF_TABLE_BITS];
|
||||
double ln2;
|
||||
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
|
||||
} __logf_data;
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue