mirror of
https://github.com/odin-lang/Odin.git
synced 2026-04-21 22:05:20 +00:00
Remove libm dependency in core:math where possible
This commit is contained in:
@@ -14,10 +14,10 @@ Float_Class :: enum {
|
||||
Neg_Inf, // negative infinity
|
||||
}
|
||||
|
||||
TAU :: 6.28318530717958647692528676655900576
|
||||
PI :: 3.14159265358979323846264338327950288
|
||||
TAU :: 6.28318530717958647692528676655900576
|
||||
PI :: 3.14159265358979323846264338327950288
|
||||
|
||||
E :: 2.71828182845904523536
|
||||
E :: 2.71828182845904523536
|
||||
|
||||
τ :: TAU
|
||||
π :: PI
|
||||
@@ -42,6 +42,32 @@ min :: builtin.min
|
||||
max :: builtin.max
|
||||
clamp :: builtin.clamp
|
||||
|
||||
|
||||
@(private)
|
||||
IS_WASM :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
|
||||
|
||||
@(require_results)
|
||||
sqrt_f16 :: proc "contextless" (x: f16) -> f16 {
|
||||
when IS_WASM {
|
||||
return f16(sqrt_f64(f64(x)))
|
||||
} else {
|
||||
return intrinsics.sqrt(x)
|
||||
}
|
||||
}
|
||||
@(require_results)
|
||||
sqrt_f32 :: proc "contextless" (x: f32) -> f32 {
|
||||
when IS_WASM {
|
||||
return f32(sqrt_f64(f64(x)))
|
||||
} else {
|
||||
return intrinsics.sqrt(x)
|
||||
}
|
||||
}
|
||||
@(require_results)
|
||||
sqrt_f64 :: proc "contextless" (x: f64) -> f64 {
|
||||
return intrinsics.sqrt(x)
|
||||
}
|
||||
|
||||
|
||||
@(require_results) sqrt_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(sqrt_f16(f16(x))) }
|
||||
@(require_results) sqrt_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(sqrt_f16(f16(x))) }
|
||||
@(require_results) sqrt_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(sqrt_f32(f32(x))) }
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
//+build js
|
||||
package math
|
||||
|
||||
import "base:intrinsics"
|
||||
|
||||
foreign import "odin_env"
|
||||
|
||||
@(default_calling_convention="c")
|
||||
foreign odin_env {
|
||||
@(link_name="pow", require_results)
|
||||
pow_f64 :: proc(x, power: f64) -> f64 ---
|
||||
@(link_name="fmuladd", require_results)
|
||||
fmuladd_f64 :: proc(a, b, c: f64) -> f64 ---
|
||||
@(link_name="ln", require_results)
|
||||
ln_f64 :: proc(x: f64) -> f64 ---
|
||||
@(link_name="exp", require_results)
|
||||
exp_f64 :: proc(x: f64) -> f64 ---
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
sqrt_f64 :: proc "contextless" (x: f64) -> f64 {
|
||||
return intrinsics.sqrt(x)
|
||||
}
|
||||
|
||||
@(require_results) sqrt_f16 :: proc "c" (x: f16) -> f16 { return f16(sqrt_f64(f64(x))) }
|
||||
@(require_results) pow_f16 :: proc "c" (x, power: f16) -> f16 { return f16(pow_f64(f64(x), f64(power))) }
|
||||
@(require_results) fmuladd_f16 :: proc "c" (a, b, c: f16) -> f16 { return f16(fmuladd_f64(f64(a), f64(a), f64(c))) }
|
||||
@(require_results) ln_f16 :: proc "c" (x: f16) -> f16 { return f16(ln_f64(f64(x))) }
|
||||
@(require_results) exp_f16 :: proc "c" (x: f16) -> f16 { return f16(exp_f64(f64(x))) }
|
||||
|
||||
@(require_results) sqrt_f32 :: proc "c" (x: f32) -> f32 { return f32(sqrt_f64(f64(x))) }
|
||||
@(require_results) pow_f32 :: proc "c" (x, power: f32) -> f32 { return f32(pow_f64(f64(x), f64(power))) }
|
||||
@(require_results) fmuladd_f32 :: proc "c" (a, b, c: f32) -> f32 { return f32(fmuladd_f64(f64(a), f64(a), f64(c))) }
|
||||
@(require_results) ln_f32 :: proc "c" (x: f32) -> f32 { return f32(ln_f64(f64(x))) }
|
||||
@(require_results) exp_f32 :: proc "c" (x: f32) -> f32 { return f32(exp_f64(f64(x))) }
|
||||
|
||||
@(require_results) ln_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(ln_f64(f64(x))) }
|
||||
@(require_results) ln_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(ln_f64(f64(x))) }
|
||||
@(require_results) ln_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(ln_f64(f64(x))) }
|
||||
@(require_results) ln_f32be :: proc "contextless" (x: f32be) -> f32be { return #force_inline f32be(ln_f64(f64(x))) }
|
||||
@(require_results) ln_f64le :: proc "contextless" (x: f64le) -> f64le { return #force_inline f64le(ln_f64(f64(x))) }
|
||||
@(require_results) ln_f64be :: proc "contextless" (x: f64be) -> f64be { return #force_inline f64be(ln_f64(f64(x))) }
|
||||
ln :: proc{
|
||||
ln_f16, ln_f16le, ln_f16be,
|
||||
ln_f32, ln_f32le, ln_f32be,
|
||||
ln_f64, ln_f64le, ln_f64be,
|
||||
}
|
||||
218
core/math/math_fmuladd.odin
Normal file
218
core/math/math_fmuladd.odin
Normal file
@@ -0,0 +1,218 @@
|
||||
package math
|
||||
|
||||
import "base:intrinsics"
|
||||
_ :: intrinsics
|
||||
|
||||
@(require_results)
|
||||
fmuladd_f16 :: proc "contextless" (a, b, c: f16) -> f16 {
|
||||
when IS_WASM {
|
||||
return f16(fmuladd_f64(f64(a), f64(b), f64(c)))
|
||||
} else {
|
||||
foreign _ {
|
||||
@(link_name="llvm.fmuladd.f16", require_results)
|
||||
_fmuladd_f16 :: proc "none" (a, b, c: f16) -> f16 ---
|
||||
}
|
||||
|
||||
return _fmuladd_f16(a, b, c)
|
||||
}
|
||||
}
|
||||
@(require_results)
|
||||
fmuladd_f32 :: proc "contextless" (a, b, c: f32) -> f32 {
|
||||
when IS_WASM {
|
||||
return f32(fmuladd_f64(f64(a), f64(b), f64(c)))
|
||||
} else {
|
||||
foreign _ {
|
||||
@(link_name="llvm.fmuladd.f32", require_results)
|
||||
_fmuladd_f32 :: proc "none" (a, b, c: f32) -> f32 ---
|
||||
}
|
||||
|
||||
return _fmuladd_f32(a, b, c)
|
||||
}
|
||||
}
|
||||
@(require_results)
|
||||
fmuladd_f64 :: proc "contextless" (a, b, c: f64) -> f64 {
|
||||
when IS_WASM {
|
||||
return #force_inline fmuladd_slow_f64(a, b, c)
|
||||
} else {
|
||||
foreign _ {
|
||||
@(link_name="llvm.fmuladd.f64", require_results)
|
||||
_fmuladd_f64 :: proc "none" (a, b, c: f64) -> f64 ---
|
||||
}
|
||||
|
||||
return _fmuladd_f64(a, b, c)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@(require_results)
|
||||
fmuladd_slow_f64 :: proc "contextless" (x, y, z: f64) -> f64 {
|
||||
@(require_results)
|
||||
split :: proc "contextless" (b: u64) -> (sign: u32, exp: i32, mantissa: u64) {
|
||||
MASK :: 0x7FF
|
||||
FRAC_MASK :: 1<<52 - 1
|
||||
|
||||
sign = u32(b >> 63)
|
||||
exp = i32(b>>52) & MASK
|
||||
mantissa = b & FRAC_MASK
|
||||
|
||||
if exp == 0 {
|
||||
shift := uint(intrinsics.count_leading_zeros(mantissa) - 11)
|
||||
mantissa <<= shift
|
||||
exp = 1 - i32(shift)
|
||||
} else {
|
||||
mantissa |= 1<<52
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
mul_u64 :: proc "contextless" (x, y: u64) -> (hi, lo: u64) {
|
||||
prod_wide := u128(x) * u128(y)
|
||||
hi, lo = u64(prod_wide>>64), u64(prod_wide)
|
||||
return
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
add_u64 :: proc "contextless" (x, y, carry: u64) -> (sum, carry_out: u64) {
|
||||
tmp_carry, tmp_carry2: bool
|
||||
sum, tmp_carry = intrinsics.overflow_add(x, y)
|
||||
sum, tmp_carry2 = intrinsics.overflow_add(sum, carry)
|
||||
carry_out = u64(tmp_carry | tmp_carry2)
|
||||
return
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
sub_u64 :: proc "contextless" (x, y, borrow: u64) -> (diff, borrow_out: u64) {
|
||||
tmp_borrow, tmp_borrow2: bool
|
||||
diff, tmp_borrow = intrinsics.overflow_sub(x, y)
|
||||
diff, tmp_borrow2 = intrinsics.overflow_sub(diff, borrow)
|
||||
borrow_out = u64(tmp_borrow | tmp_borrow2)
|
||||
return
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
nonzero :: proc "contextless" (x: u64) -> u64 {
|
||||
return 1 if x != 0 else 0
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
zero :: proc "contextless" (x: u64) -> u64 {
|
||||
return 1 if x == 0 else 0
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
shl :: proc "contextless" (u1, u2: u64, n: uint) -> (r1, r2: u64) {
|
||||
r1 = u1<<n | u2>>(64-n) | u2<<(n-64)
|
||||
r2 = u2<<n
|
||||
return
|
||||
}
|
||||
@(require_results)
|
||||
shr :: proc "contextless" (u1, u2: u64, n: uint) -> (r1, r2: u64) {
|
||||
r2 = u2>>n | u1<<(64-n) | u1>>(n-64)
|
||||
r1 = u1>>n
|
||||
return
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
lz :: proc "contextless" (u1, u2: u64) -> (l: i32) {
|
||||
l = i32(intrinsics.count_leading_zeros(u1))
|
||||
if l == 64 {
|
||||
l += i32(intrinsics.count_leading_zeros(u2))
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
shrcompress :: proc "contextless" (u1, u2: u64, n: uint) -> (r1, r2: u64) {
|
||||
switch {
|
||||
case n == 0:
|
||||
return u1, u2
|
||||
case n == 64:
|
||||
return 0, u1 | nonzero(u2)
|
||||
case n >= 128:
|
||||
return 0, nonzero(u1 | u2)
|
||||
case n < 64:
|
||||
r1, r2 = shr(u1, u2, n)
|
||||
r2 |= nonzero(u2 & (1<<n - 1))
|
||||
case n < 128:
|
||||
r1, r2 = shr(u1, u2, n)
|
||||
r2 |= nonzero(u1&(1<<(n-64)-1) | u2)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
UVINF :: 0x7ff0_0000_0000_0000
|
||||
BIAS :: 1023
|
||||
|
||||
bx, by, bz := transmute(u64)x, transmute(u64)y, transmute(u64)z
|
||||
|
||||
switch {
|
||||
case x == 0, y == 0, z == 0,
|
||||
bx&UVINF == UVINF, by&UVINF == UVINF:
|
||||
return x*y + z
|
||||
}
|
||||
|
||||
if bz&UVINF == UVINF {
|
||||
return z
|
||||
}
|
||||
|
||||
xs, xe, xm := split(bx)
|
||||
ys, ye, ym := split(by)
|
||||
zs, ze, zm := split(bz)
|
||||
|
||||
pe := xe + ye - BIAS + 1
|
||||
|
||||
pm1, pm2 := mul_u64(xm<<10, ym<<11)
|
||||
zm1, zm2 := zm<<10, u64(0)
|
||||
ps := xs ~ ys // product sign
|
||||
|
||||
is_62_zero := uint((~pm1 >> 62) & 1)
|
||||
pm1, pm2 = shl(pm1, pm2, is_62_zero)
|
||||
pe -= i32(is_62_zero)
|
||||
|
||||
if pe < ze || pe == ze && pm1 < zm1 {
|
||||
// Swap addition operands so |p| >= |z|
|
||||
ps, pe, pm1, pm2, zs, ze, zm1, zm2 = zs, ze, zm1, zm2, ps, pe, pm1, pm2
|
||||
}
|
||||
|
||||
if ps != zs && pe == ze && pm1 == zm1 && pm2 == zm2 {
|
||||
return 0
|
||||
}
|
||||
|
||||
zm1, zm2 = shrcompress(zm1, zm2, uint(pe-ze))
|
||||
|
||||
// Compute resulting significands, normalizing if necessary.
|
||||
m, c: u64
|
||||
if ps == zs {
|
||||
// Adding (pm1:pm2) + (zm1:zm2)
|
||||
pm2, c = add_u64(pm2, zm2, 0)
|
||||
pm1, _ = add_u64(pm1, zm1, c)
|
||||
pe -= i32(~pm1 >> 63)
|
||||
pm1, m = shrcompress(pm1, pm2, uint(64+pm1>>63))
|
||||
} else {
|
||||
// Subtracting (pm1:pm2) - (zm1:zm2)
|
||||
pm2, c = sub_u64(pm2, zm2, 0)
|
||||
pm1, _ = sub_u64(pm1, zm1, c)
|
||||
nz := lz(pm1, pm2)
|
||||
pe -= nz
|
||||
m, pm2 = shl(pm1, pm2, uint(nz-1))
|
||||
m |= nonzero(pm2)
|
||||
}
|
||||
|
||||
// Round and break ties to even
|
||||
if pe > 1022+BIAS || pe == 1022+BIAS && (m+1<<9)>>63 == 1 {
|
||||
// rounded value overflows exponent range
|
||||
return transmute(f64)(u64(ps)<<63 | UVINF)
|
||||
}
|
||||
if pe < 0 {
|
||||
n := uint(-pe)
|
||||
m = m>>n | nonzero(m&(1<<n-1))
|
||||
pe = 0
|
||||
}
|
||||
m = ((m + 1<<9) >> 10) & ~zero((m&(1<<10-1))~1<<9)
|
||||
pe &= -i32(nonzero(m))
|
||||
return transmute(f64)(u64(ps)<<63 + u64(pe)<<52 + m)
|
||||
}
|
||||
@@ -1,87 +1,5 @@
|
||||
//+build !js
|
||||
package math
|
||||
|
||||
import "base:intrinsics"
|
||||
|
||||
@(default_calling_convention="none", private="file")
|
||||
foreign _ {
|
||||
@(link_name="llvm.pow.f16", require_results)
|
||||
_pow_f16 :: proc(x, power: f16) -> f16 ---
|
||||
@(link_name="llvm.pow.f32", require_results)
|
||||
_pow_f32 :: proc(x, power: f32) -> f32 ---
|
||||
@(link_name="llvm.pow.f64", require_results)
|
||||
_pow_f64 :: proc(x, power: f64) -> f64 ---
|
||||
|
||||
@(link_name="llvm.fmuladd.f16", require_results)
|
||||
_fmuladd_f16 :: proc(a, b, c: f16) -> f16 ---
|
||||
@(link_name="llvm.fmuladd.f32", require_results)
|
||||
_fmuladd_f32 :: proc(a, b, c: f32) -> f32 ---
|
||||
@(link_name="llvm.fmuladd.f64", require_results)
|
||||
_fmuladd_f64 :: proc(a, b, c: f64) -> f64 ---
|
||||
|
||||
@(link_name="llvm.exp.f16", require_results)
|
||||
_exp_f16 :: proc(x: f16) -> f16 ---
|
||||
@(link_name="llvm.exp.f32", require_results)
|
||||
_exp_f32 :: proc(x: f32) -> f32 ---
|
||||
@(link_name="llvm.exp.f64", require_results)
|
||||
_exp_f64 :: proc(x: f64) -> f64 ---
|
||||
}
|
||||
|
||||
|
||||
@(require_results)
|
||||
pow_f16 :: proc "contextless" (x, power: f16) -> f16 {
|
||||
return _pow_f16(x, power)
|
||||
}
|
||||
@(require_results)
|
||||
pow_f32 :: proc "contextless" (x, power: f32) -> f32 {
|
||||
return _pow_f32(x, power)
|
||||
}
|
||||
@(require_results)
|
||||
pow_f64 :: proc "contextless" (x, power: f64) -> f64 {
|
||||
return _pow_f64(x, power)
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
fmuladd_f16 :: proc "contextless" (a, b, c: f16) -> f16 {
|
||||
return _fmuladd_f16(a, b, c)
|
||||
}
|
||||
@(require_results)
|
||||
fmuladd_f32 :: proc "contextless" (a, b, c: f32) -> f32 {
|
||||
return _fmuladd_f32(a, b, c)
|
||||
}
|
||||
@(require_results)
|
||||
fmuladd_f64 :: proc "contextless" (a, b, c: f64) -> f64 {
|
||||
return _fmuladd_f64(a, b, c)
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
exp_f16 :: proc "contextless" (x: f16) -> f16 {
|
||||
return _exp_f16(x)
|
||||
}
|
||||
@(require_results)
|
||||
exp_f32 :: proc "contextless" (x: f32) -> f32 {
|
||||
return _exp_f32(x)
|
||||
}
|
||||
@(require_results)
|
||||
exp_f64 :: proc "contextless" (x: f64) -> f64 {
|
||||
return _exp_f64(x)
|
||||
}
|
||||
|
||||
|
||||
@(require_results)
|
||||
sqrt_f16 :: proc "contextless" (x: f16) -> f16 {
|
||||
return intrinsics.sqrt(x)
|
||||
}
|
||||
@(require_results)
|
||||
sqrt_f32 :: proc "contextless" (x: f32) -> f32 {
|
||||
return intrinsics.sqrt(x)
|
||||
}
|
||||
@(require_results)
|
||||
sqrt_f64 :: proc "contextless" (x: f64) -> f64 {
|
||||
return intrinsics.sqrt(x)
|
||||
}
|
||||
|
||||
|
||||
|
||||
@(require_results)
|
||||
ln_f64 :: proc "contextless" (x: f64) -> f64 {
|
||||
@@ -145,7 +63,7 @@ ln_f64 :: proc "contextless" (x: f64) -> f64 {
|
||||
// constants. The decimal values may be used, provided that the
|
||||
// compiler will convert from decimal to binary accurately enough
|
||||
// to produce the hexadecimal values shown.
|
||||
|
||||
|
||||
LN2_HI :: 0h3fe62e42_fee00000 // 6.93147180369123816490e-01
|
||||
LN2_LO :: 0h3dea39ef_35793c76 // 1.90821492927058770002e-10
|
||||
L1 :: 0h3fe55555_55555593 // 6.666666666666735130e-01
|
||||
@@ -155,7 +73,7 @@ ln_f64 :: proc "contextless" (x: f64) -> f64 {
|
||||
L5 :: 0h3fc74664_96cb03de // 1.818357216161805012e-01
|
||||
L6 :: 0h3fc39a09_d078c69f // 1.531383769920937332e-01
|
||||
L7 :: 0h3fc2f112_df3e5244 // 1.479819860511658591e-01
|
||||
|
||||
|
||||
switch {
|
||||
case is_nan(x) || is_inf(x, 1):
|
||||
return x
|
||||
210
core/math/math_pow.odin
Normal file
210
core/math/math_pow.odin
Normal file
@@ -0,0 +1,210 @@
|
||||
package math
|
||||
|
||||
|
||||
// pow returns x**y, the base-x exponential of y.
|
||||
//
|
||||
// Special cases are (in order):
|
||||
//
|
||||
// pow(x, ±0) = 1 for any x
|
||||
// pow(1, y) = 1 for any y
|
||||
// pow(x, 1) = x for any x
|
||||
// pow(NaN, y) = NaN
|
||||
// pow(x, NaN) = NaN
|
||||
// pow(±0, y) = ±Inf for y an odd integer < 0
|
||||
// pow(±0, -Inf) = +Inf
|
||||
// pow(±0, +Inf) = +0
|
||||
// pow(±0, y) = +Inf for finite y < 0 and not an odd integer
|
||||
// pow(±0, y) = ±0 for y an odd integer > 0
|
||||
// pow(±0, y) = +0 for finite y > 0 and not an odd integer
|
||||
// pow(-1, ±Inf) = 1
|
||||
// pow(x, +Inf) = +Inf for |x| > 1
|
||||
// pow(x, -Inf) = +0 for |x| > 1
|
||||
// pow(x, +Inf) = +0 for |x| < 1
|
||||
// pow(x, -Inf) = +Inf for |x| < 1
|
||||
// pow(+Inf, y) = +Inf for y > 0
|
||||
// pow(+Inf, y) = +0 for y < 0
|
||||
// pow(-Inf, y) = pow(-0, -y)
|
||||
// pow(x, y) = NaN for finite x < 0 and finite non-integer y
|
||||
//
|
||||
// Special cases taken from FreeBSD's /usr/src/lib/msun/src/e_pow.c
|
||||
// updated by IEEE Std. 754-2008 "Section 9.2.1 Special values".
|
||||
@(require_results)
|
||||
pow_f64 :: proc "contextless" (x, y: f64) -> f64 {
|
||||
is_odd_int :: proc "contextless" (x: f64) -> bool {
|
||||
if abs(x) >= (1<<53) {
|
||||
return false
|
||||
}
|
||||
|
||||
i, f := modf(x)
|
||||
return f == 0 && (i64(i)&1 == 1)
|
||||
}
|
||||
|
||||
switch {
|
||||
case y == 0 || x == 1:
|
||||
return 1.0
|
||||
case y == 1:
|
||||
return x
|
||||
case is_nan(x) || is_nan(y):
|
||||
return nan_f64()
|
||||
case x == 0:
|
||||
switch {
|
||||
case y < 0:
|
||||
if signbit(x) && is_odd_int(y) {
|
||||
return inf_f64(-1)
|
||||
}
|
||||
return inf_f64(1)
|
||||
case y > 0:
|
||||
if signbit(x) && is_odd_int(y) {
|
||||
return x
|
||||
}
|
||||
return 0.0
|
||||
}
|
||||
case is_inf(y, 0):
|
||||
switch {
|
||||
case x == -1:
|
||||
return 1.0
|
||||
case (abs(x) < 1) == is_inf(y, 1):
|
||||
return 0.0
|
||||
case:
|
||||
return inf_f64(1)
|
||||
}
|
||||
case is_inf(x, 0):
|
||||
if is_inf(x, -1) {
|
||||
// pow(-0, -y)
|
||||
return pow_f64(1.0/x, -y)
|
||||
}
|
||||
switch {
|
||||
case y < 0:
|
||||
return 0.0
|
||||
case y > 0:
|
||||
return inf_f64(1)
|
||||
}
|
||||
case y == 0.5:
|
||||
return sqrt_f64(x)
|
||||
case y == -0.5:
|
||||
return 1.0 / sqrt_f64(x)
|
||||
}
|
||||
|
||||
yi, yf := modf(abs(y))
|
||||
if yf != 0 && x < 0 {
|
||||
return nan_f64()
|
||||
}
|
||||
if yi >= 1<<63 {
|
||||
// yi is a large even int that will lead to overflow (or underflow to 0)
|
||||
// for all x except -1 (x == 1 was handled earlier)
|
||||
switch {
|
||||
case x == -1:
|
||||
return 1.0
|
||||
case (abs(x) < 1) == (y > 0):
|
||||
return 0.0
|
||||
case:
|
||||
return inf_f64(1)
|
||||
}
|
||||
}
|
||||
|
||||
// ans = a1 * 2**ae (= 1 for now).
|
||||
a1: f64 = 1
|
||||
ae: int = 0
|
||||
|
||||
// ans *= x**yf
|
||||
if yf != 0 {
|
||||
if yf > 0.5 {
|
||||
yf -= 1
|
||||
yi += 1
|
||||
}
|
||||
a1 = exp(yf * ln(x))
|
||||
}
|
||||
|
||||
// ans *= x**yi
|
||||
// by multiplying in successive squarings
|
||||
// of x according to bits of yi.
|
||||
// accumulate powers of two into exp.
|
||||
x1, xe := frexp(x)
|
||||
for i := i64(yi); i != 0; i >>= 1 {
|
||||
if xe < -1<<12 || 1<<12 < xe {
|
||||
// catch xe before it overflows the left shift below
|
||||
// Since i !=0 it has at least one bit still set, so ae will accumulate xe
|
||||
// on at least one more iteration, ae += xe is a lower bound on ae
|
||||
// the lower bound on ae exceeds the size of a f64 exp
|
||||
// so the final call to ldexp will produce under/overflow (0/Inf)
|
||||
ae += xe
|
||||
break
|
||||
}
|
||||
if i&1 == 1 {
|
||||
a1 *= x1
|
||||
ae += xe
|
||||
}
|
||||
x1 *= x1
|
||||
xe <<= 1
|
||||
if x1 < .5 {
|
||||
x1 += x1
|
||||
xe -= 1
|
||||
}
|
||||
}
|
||||
|
||||
// ans = a1*2**ae
|
||||
// if y < 0 { ans = 1 / ans }
|
||||
// but in the opposite order
|
||||
if y < 0 {
|
||||
a1 = 1 / a1
|
||||
ae = -ae
|
||||
}
|
||||
return ldexp(a1, ae)
|
||||
}
|
||||
|
||||
|
||||
@(require_results) pow_f16 :: proc "contextless" (x, power: f16) -> f16 { return f16(pow_f64(f64(x), f64(power))) }
|
||||
@(require_results) pow_f32 :: proc "contextless" (x, power: f32) -> f32 { return f32(pow_f64(f64(x), f64(power))) }
|
||||
|
||||
|
||||
|
||||
exp_f64 :: proc "contextless" (x: f64) -> f64 {
|
||||
LN2_HI :: 6.93147180369123816490e-01
|
||||
LN2_LO :: 1.90821492927058770002e-10
|
||||
LOG2_E :: 1.44269504088896338700e+00
|
||||
|
||||
OVERFLOW :: 7.09782712893383973096e+02
|
||||
UNDERFLOW :: -7.45133219101941108420e+02
|
||||
NEAR_ZERO :: 1.0 / (1 << 28) // 2**-28
|
||||
|
||||
// special cases
|
||||
switch {
|
||||
case is_nan(x) || is_inf(x, 1):
|
||||
return x
|
||||
case is_inf(x, -1):
|
||||
return 0
|
||||
case x > OVERFLOW:
|
||||
return inf_f64(1)
|
||||
case x < UNDERFLOW:
|
||||
return 0
|
||||
case -NEAR_ZERO < x && x < NEAR_ZERO:
|
||||
return 1 + x
|
||||
}
|
||||
|
||||
// reduce; computed as r = hi - lo for extra precision.
|
||||
k: int
|
||||
switch {
|
||||
case x < 0:
|
||||
k = int(LOG2_E*x - 0.5)
|
||||
case x > 0:
|
||||
k = int(LOG2_E*x + 0.5)
|
||||
}
|
||||
hi := x - f64(k)*LN2_HI
|
||||
lo := f64(k) * LN2_LO
|
||||
|
||||
P1 :: 0h3FC5555555555555 // 1.66666666666666657415e-01
|
||||
P2 :: 0hBF66C16C16BEBD93 // -2.77777777770155933842e-03
|
||||
P3 :: 0h3F11566AAF25DE2C // 6.61375632143793436117e-05
|
||||
P4 :: 0hBEBBBD41C5D26BF1 // -1.65339022054652515390e-06
|
||||
P5 :: 0h3E66376972BEA4D0 // 4.13813679705723846039e-08
|
||||
|
||||
r := hi - lo
|
||||
t := r * r
|
||||
c := r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))))
|
||||
y := 1 - ((lo - (r*c)/(2-c)) - hi)
|
||||
return ldexp(y, k)
|
||||
}
|
||||
|
||||
@(require_results) exp_f16 :: proc "contextless" (x: f16) -> f16 { return f16(exp_f64(f64(x))) }
|
||||
@(require_results) exp_f32 :: proc "contextless" (x: f32) -> f32 { return f32(exp_f64(f64(x))) }
|
||||
|
||||
Reference in New Issue
Block a user