From 0e2b7554c7f58ea34abfe47fa3b26b9c8c8388ce Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 2 Jun 2024 22:45:25 +0100 Subject: [PATCH] Implement `sin` and `cos` in native Odin --- core/math/math_basic.odin | 39 ---------- core/math/math_basic_js.odin | 8 -- core/math/math_sincos.odin | 139 ++++++++++++++++++++++++++++++++--- 3 files changed, 129 insertions(+), 57 deletions(-) diff --git a/core/math/math_basic.odin b/core/math/math_basic.odin index 041efd272..2c73232cc 100644 --- a/core/math/math_basic.odin +++ b/core/math/math_basic.odin @@ -5,20 +5,6 @@ import "base:intrinsics" @(default_calling_convention="none", private="file") foreign _ { - @(link_name="llvm.sin.f16", require_results) - _sin_f16 :: proc(θ: f16) -> f16 --- - @(link_name="llvm.sin.f32", require_results) - _sin_f32 :: proc(θ: f32) -> f32 --- - @(link_name="llvm.sin.f64", require_results) - _sin_f64 :: proc(θ: f64) -> f64 --- - - @(link_name="llvm.cos.f16", require_results) - _cos_f16 :: proc(θ: f16) -> f16 --- - @(link_name="llvm.cos.f32", require_results) - _cos_f32 :: proc(θ: f32) -> f32 --- - @(link_name="llvm.cos.f64", require_results) - _cos_f64 :: proc(θ: f64) -> f64 --- - @(link_name="llvm.pow.f16", require_results) _pow_f16 :: proc(x, power: f16) -> f16 --- @(link_name="llvm.pow.f32", require_results) @@ -41,31 +27,6 @@ foreign _ { _exp_f64 :: proc(x: f64) -> f64 --- } -@(require_results) -sin_f16 :: proc "contextless" (θ: f16) -> f16 { - return _sin_f16(θ) -} -@(require_results) -sin_f32 :: proc "contextless" (θ: f32) -> f32 { - return _sin_f32(θ) -} -@(require_results) -sin_f64 :: proc "contextless" (θ: f64) -> f64 { - return _sin_f64(θ) -} - -@(require_results) -cos_f16 :: proc "contextless" (θ: f16) -> f16 { - return _cos_f16(θ) -} -@(require_results) -cos_f32 :: proc "contextless" (θ: f32) -> f32 { - return _cos_f32(θ) -} -@(require_results) -cos_f64 :: proc "contextless" (θ: f64) -> f64 { - return _cos_f64(θ) -} @(require_results) pow_f16 :: proc "contextless" (x, power: f16) -> f16 { diff --git a/core/math/math_basic_js.odin b/core/math/math_basic_js.odin index 5b9adabcd..df68da1b8 100644 --- a/core/math/math_basic_js.odin +++ b/core/math/math_basic_js.odin @@ -7,10 +7,6 @@ foreign import "odin_env" @(default_calling_convention="c") foreign odin_env { - @(link_name="sin", require_results) - sin_f64 :: proc(θ: f64) -> f64 --- - @(link_name="cos", require_results) - cos_f64 :: proc(θ: f64) -> f64 --- @(link_name="pow", require_results) pow_f64 :: proc(x, power: f64) -> f64 --- @(link_name="fmuladd", require_results) @@ -27,16 +23,12 @@ sqrt_f64 :: proc "contextless" (x: f64) -> f64 { } @(require_results) sqrt_f16 :: proc "c" (x: f16) -> f16 { return f16(sqrt_f64(f64(x))) } -@(require_results) sin_f16 :: proc "c" (θ: f16) -> f16 { return f16(sin_f64(f64(θ))) } -@(require_results) cos_f16 :: proc "c" (θ: f16) -> f16 { return f16(cos_f64(f64(θ))) } @(require_results) pow_f16 :: proc "c" (x, power: f16) -> f16 { return f16(pow_f64(f64(x), f64(power))) } @(require_results) fmuladd_f16 :: proc "c" (a, b, c: f16) -> f16 { return f16(fmuladd_f64(f64(a), f64(a), f64(c))) } @(require_results) ln_f16 :: proc "c" (x: f16) -> f16 { return f16(ln_f64(f64(x))) } @(require_results) exp_f16 :: proc "c" (x: f16) -> f16 { return f16(exp_f64(f64(x))) } @(require_results) sqrt_f32 :: proc "c" (x: f32) -> f32 { return f32(sqrt_f64(f64(x))) } -@(require_results) sin_f32 :: proc "c" (θ: f32) -> f32 { return f32(sin_f64(f64(θ))) } -@(require_results) cos_f32 :: proc "c" (θ: f32) -> f32 { return f32(cos_f64(f64(θ))) } @(require_results) pow_f32 :: proc "c" (x, power: f32) -> f32 { return f32(pow_f64(f64(x), f64(power))) } @(require_results) fmuladd_f32 :: proc "c" (a, b, c: f32) -> f32 { return f32(fmuladd_f64(f64(a), f64(a), f64(c))) } @(require_results) ln_f32 :: proc "c" (x: f32) -> f32 { return f32(ln_f64(f64(x))) } diff --git a/core/math/math_sincos.odin b/core/math/math_sincos.odin index 578876ac5..1d4707894 100644 --- a/core/math/math_sincos.odin +++ b/core/math/math_sincos.odin @@ -89,48 +89,58 @@ sincos :: proc{ sincos_f64, sincos_f64le, sincos_f64be, } +@(require_results) sincos_f16 :: proc "contextless" (x: f16) -> (sin, cos: f16) #no_bounds_check { s, c := sincos_f64(f64(x)) return f16(s), f16(c) } +@(require_results) sincos_f16le :: proc "contextless" (x: f16le) -> (sin, cos: f16le) #no_bounds_check { s, c := sincos_f64(f64(x)) return f16le(s), f16le(c) } +@(require_results) sincos_f16be :: proc "contextless" (x: f16be) -> (sin, cos: f16be) #no_bounds_check { s, c := sincos_f64(f64(x)) return f16be(s), f16be(c) } +@(require_results) sincos_f32 :: proc "contextless" (x: f32) -> (sin, cos: f32) #no_bounds_check { s, c := sincos_f64(f64(x)) return f32(s), f32(c) } +@(require_results) sincos_f32le :: proc "contextless" (x: f32le) -> (sin, cos: f32le) #no_bounds_check { s, c := sincos_f64(f64(x)) return f32le(s), f32le(c) } +@(require_results) sincos_f32be :: proc "contextless" (x: f32be) -> (sin, cos: f32be) #no_bounds_check { s, c := sincos_f64(f64(x)) return f32be(s), f32be(c) } +@(require_results) sincos_f64le :: proc "contextless" (x: f64le) -> (sin, cos: f64le) #no_bounds_check { s, c := sincos_f64(f64(x)) return f64le(s), f64le(c) } +@(require_results) sincos_f64be :: proc "contextless" (x: f64be) -> (sin, cos: f64be) #no_bounds_check { s, c := sincos_f64(f64(x)) return f64be(s), f64be(c) } + +@(private="file") PI4A :: 0h3fe921fb40000000 // 7.85398125648498535156e-1 PI/4 split into three parts +@(private="file") PI4B :: 0h3e64442d00000000 // 3.77489470793079817668e-8 +@(private="file") PI4C :: 0h3ce8469898cc5170 // 2.69515142907905952645e-15 + +@(require_results) sincos_f64 :: proc "contextless" (x: f64) -> (sin, cos: f64) #no_bounds_check { x := x - PI4A :: 0h3fe921fb40000000 // 7.85398125648498535156e-1 PI/4 split into three parts - PI4B :: 0h3e64442d00000000 // 3.77489470793079817668e-8 - PI4C :: 0h3ce8469898cc5170 // 2.69515142907905952645e-15 - // special cases switch { case x == 0: @@ -189,12 +199,12 @@ sincos_f64 :: proc "contextless" (x: f64) -> (sin, cos: f64) #no_bounds_check { // sin coefficients @(private="file") _sin := [?]f64{ - 0h3de5d8fd1fd19ccd, // 1.58962301576546568060e-10 - 0hbe5ae5e5a9291f5d, // -2.50507477628578072866e-8 - 0h3ec71de3567d48a1, // 2.75573136213857245213e-6 - 0hbf2a01a019bfdf03, // -1.98412698295895385996e-4 - 0h3f8111111110f7d0, // 8.33333333332211858878e-3 - 0hbfc5555555555548, // -1.66666666666666307295e-1 + 0h3de5d8fd1fd19ccd, // 1.58962301576546568060e-10 + 0hbe5ae5e5a9291f5d, // -2.50507477628578072866e-8 + 0h3ec71de3567d48a1, // 2.75573136213857245213e-6 + 0hbf2a01a019bfdf03, // -1.98412698295895385996e-4 + 0h3f8111111110f7d0, // 8.33333333332211858878e-3 + 0hbfc5555555555548, // -1.66666666666666307295e-1 } // cos coefficients @@ -229,6 +239,7 @@ REDUCE_THRESHOLD :: 1 << 29 // "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit" // K. C. Ng et al, March 24, 1992 // The simulated multi-precision calculation of x*B uses 64-bit integer arithmetic. +@(require_results) _trig_reduce_f64 :: proc "contextless" (x: f64) -> (j: u64, z: f64) #no_bounds_check { // bd_pi4 is the binary digits of 4/pi as a u64 array, // that is, 4/pi = Sum bd_pi4[i]*2^(-64*i) @@ -306,3 +317,111 @@ _trig_reduce_f64 :: proc "contextless" (x: f64) -> (j: u64, z: f64) #no_bounds_c // Multiply the fractional part by pi/4. return j, z * PI4 } + + +@(require_results) +cos_f64 :: proc "contextless" (x: f64) -> f64 #no_bounds_check { + x := x + switch { + case is_nan(x) || is_inf(x, 0): + return nan_f64() + } + + // make argument positive + sign := false + x = abs(x) + + j: u64 + y, z: f64 + if x >= REDUCE_THRESHOLD { + j, z = _trig_reduce_f64(x) + } else { + j = u64(x * (4.0 / PI)) + y = f64(j) + + // map zeros to origin + if j&1 == 1 { + j += 1 + y += 1 + } + j &= 7 // octant modulo 2Pi radians (360 degrees) + z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic + } + + if j > 3 { + j -= 4 + sign = !sign + } + if j > 1 { + sign = !sign + } + + zz := z * z + if j == 1 || j == 2 { + y = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5]) + } else { + y = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5]) + } + if sign { + y = -y + } + return y +} + + +@(require_results) +sin_f64 :: proc "contextless" (x: f64) -> f64 #no_bounds_check { + x := x + + switch { + case x == 0 || is_nan(x): + return x + case is_inf(x, 0): + return nan_f64() + } + + // make argument positive but save the sign + sign := false + if x < 0 { + x = -x + sign = true + } + + j: u64 + y, z: f64 + if x >= REDUCE_THRESHOLD { + j, z = _trig_reduce_f64(x) + } else { + j = u64(x * (4.0 / PI)) + y = f64(j) + + // map zeros to origin + if j&1 == 1 { + j += 1 + y += 1 + } + j &= 7 // octant modulo 2Pi radians (360 degrees) + z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic + } + // reflect in x axis + if j > 3 { + sign = !sign + j -= 4 + } + zz := z * z + if j == 1 || j == 2 { + y = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5]) + } else { + y = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5]) + } + if sign { + y = -y + } + return y +} + + +@(require_results) sin_f16 :: proc "c" (θ: f16) -> f16 { return f16(sin_f64(f64(θ))) } +@(require_results) cos_f16 :: proc "c" (θ: f16) -> f16 { return f16(cos_f64(f64(θ))) } +@(require_results) sin_f32 :: proc "c" (θ: f32) -> f32 { return f32(sin_f64(f64(θ))) } +@(require_results) cos_f32 :: proc "c" (θ: f32) -> f32 { return f32(cos_f64(f64(θ))) } \ No newline at end of file