mirror of
https://github.com/odin-lang/Odin.git
synced 2026-05-27 14:15:09 +00:00
Merge pull request #6549 from jakubtomsu/faster-linalg
Faster linalg `dot`, `cross3`, `floor`, `ceil` and add `trunc`
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package linalg
|
||||
|
||||
import "base:builtin"
|
||||
import "base:intrinsics"
|
||||
import "core:math"
|
||||
|
||||
@(require_results)
|
||||
@@ -413,26 +414,12 @@ pow :: proc "contextless" (x, e: $T) -> (out: T) where IS_FLOAT(ELEM_TYPE(T)) {
|
||||
|
||||
@(require_results)
|
||||
ceil :: proc "contextless" (x: $T) -> (out: T) where IS_FLOAT(ELEM_TYPE(T)) {
|
||||
when IS_ARRAY(T) {
|
||||
for i in 0..<len(T) {
|
||||
out[i] = #force_inline math.ceil(x[i])
|
||||
}
|
||||
} else {
|
||||
out = #force_inline math.ceil(x)
|
||||
}
|
||||
return
|
||||
return _from_simd4(T, intrinsics.simd_ceil(_to_simd4(x)))
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
floor :: proc "contextless" (x: $T) -> (out: T) where IS_FLOAT(ELEM_TYPE(T)) {
|
||||
when IS_ARRAY(T) {
|
||||
for i in 0..<len(T) {
|
||||
out[i] = #force_inline math.floor(x[i])
|
||||
}
|
||||
} else {
|
||||
out = #force_inline math.floor(x)
|
||||
}
|
||||
return
|
||||
return _from_simd4(T, intrinsics.simd_floor(_to_simd4(x)))
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
@@ -447,6 +434,11 @@ round :: proc "contextless" (x: $T) -> (out: T) where IS_FLOAT(ELEM_TYPE(T)) {
|
||||
return
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
trunc :: proc "contextless" (x: $T) -> (out: T) where IS_NUMERIC(ELEM_TYPE(T)) {
|
||||
return _from_simd4(T, intrinsics.simd_trunc(_to_simd4(x)))
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
fract :: proc "contextless" (x: $T) -> T where IS_FLOAT(ELEM_TYPE(T)) {
|
||||
f := #force_inline floor(x)
|
||||
@@ -613,3 +605,46 @@ not :: proc "contextless" (x: $A/[$N]bool) -> (out: A) {
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@(require_results)
|
||||
_to_simd4 :: #force_inline proc "contextless" (a: $T) -> (out: #simd[4]ELEM_TYPE(T)) where IS_NUMERIC(ELEM_TYPE(T)) #no_bounds_check {
|
||||
when IS_ARRAY(T) {
|
||||
when len(T) == 1 {
|
||||
_a: [4]ELEM_TYPE(T)
|
||||
_a.x = a.x
|
||||
return transmute(#simd[4]ELEM_TYPE(T))_a
|
||||
} else when len(T) == 2 {
|
||||
_a: [4]ELEM_TYPE(T)
|
||||
_a.xy = a
|
||||
return transmute(#simd[4]ELEM_TYPE(T))_a
|
||||
} else when len(T) == 3 {
|
||||
_a: [4]ELEM_TYPE(T)
|
||||
_a.xyz = a
|
||||
return transmute(#simd[4]ELEM_TYPE(T))_a
|
||||
} else {
|
||||
return transmute(#simd[4]ELEM_TYPE(T))a
|
||||
}
|
||||
} else {
|
||||
_a: [4]ELEM_TYPE(T)
|
||||
_a.x = a
|
||||
return transmute(#simd[4]ELEM_TYPE(T))_a
|
||||
}
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
_from_simd4 :: #force_inline proc "contextless" ($T: typeid, a: $V/#simd[4]$E) -> T where IS_NUMERIC(ELEM_TYPE(T)) #no_bounds_check {
|
||||
when IS_ARRAY(T) {
|
||||
when len(T) == 1 {
|
||||
return (transmute([4]ELEM_TYPE(T))a).x
|
||||
} else when len(T) == 2 {
|
||||
return (transmute([4]ELEM_TYPE(T))a).xy
|
||||
} else when len(T) == 3 {
|
||||
return (transmute([4]ELEM_TYPE(T))a).xyz
|
||||
} else {
|
||||
return transmute([4]ELEM_TYPE(T))a
|
||||
}
|
||||
} else {
|
||||
return (transmute([4]ELEM_TYPE(T))a).x
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,11 +46,23 @@ scalar_dot :: proc "contextless" (a, b: $T) -> T where IS_FLOAT(T), !IS_ARRAY(T)
|
||||
|
||||
@(require_results)
|
||||
vector_dot :: proc "contextless" (a, b: $T/[$N]$E) -> (c: E) where IS_NUMERIC(E) #no_bounds_check {
|
||||
for i in 0..<N {
|
||||
c += a[i] * b[i]
|
||||
ab := a * b
|
||||
when N == 1 {
|
||||
return ab.x
|
||||
} else when N == 2 {
|
||||
return ab.x + ab.y
|
||||
} else when N == 3 {
|
||||
return ab.x + ab.y + ab.z
|
||||
} else when N == 4 {
|
||||
return ab.x + ab.y + ab.z + ab.w
|
||||
} else {
|
||||
for elem in ab {
|
||||
c += elem
|
||||
}
|
||||
return c
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
quaternion64_dot :: proc "contextless" (a, b: $T/quaternion64) -> (c: f16) {
|
||||
return a.w*b.w + a.x*b.x + a.y*b.y + a.z*b.z
|
||||
@@ -86,11 +98,8 @@ vector_cross2 :: proc "contextless" (a, b: $T/[2]$E) -> E where IS_NUMERIC(E) {
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
vector_cross3 :: proc "contextless" (a, b: $T/[3]$E) -> (c: T) where IS_NUMERIC(E) {
|
||||
c[0] = a[1]*b[2] - b[1]*a[2]
|
||||
c[1] = a[2]*b[0] - b[2]*a[0]
|
||||
c[2] = a[0]*b[1] - b[0]*a[1]
|
||||
return
|
||||
vector_cross3 :: proc "contextless" (a, b: $T/[3]$E) -> (c: T) where IS_NUMERIC(E) #no_bounds_check {
|
||||
return a.yzx*b.zxy - b.yzx*a.zxy
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
@@ -130,12 +139,12 @@ normalize0 :: proc{vector_normalize0, quaternion_normalize0}
|
||||
|
||||
@(require_results)
|
||||
vector_length :: proc "contextless" (v: $T/[$N]$E) -> E where IS_FLOAT(E) {
|
||||
return math.sqrt(dot(v, v))
|
||||
return #force_inline math.sqrt(#force_inline dot(v, v))
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
vector_length2 :: proc "contextless" (v: $T/[$N]$E) -> E where IS_NUMERIC(E) {
|
||||
return dot(v, v)
|
||||
return #force_inline dot(v, v)
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
|
||||
Reference in New Issue
Block a user