Merge pull request #6549 from jakubtomsu/faster-linalg

Faster linalg `dot`, `cross3`, `floor`, `ceil` and add `trunc`
This commit is contained in:
Jeroen van Rijn
2026-04-10 19:06:54 +02:00
committed by GitHub
2 changed files with 70 additions and 26 deletions

View File

@@ -1,6 +1,7 @@
package linalg
import "base:builtin"
import "base:intrinsics"
import "core:math"
@(require_results)
@@ -413,26 +414,12 @@ pow :: proc "contextless" (x, e: $T) -> (out: T) where IS_FLOAT(ELEM_TYPE(T)) {
@(require_results)
ceil :: proc "contextless" (x: $T) -> (out: T) where IS_FLOAT(ELEM_TYPE(T)) {
when IS_ARRAY(T) {
for i in 0..<len(T) {
out[i] = #force_inline math.ceil(x[i])
}
} else {
out = #force_inline math.ceil(x)
}
return
return _from_simd4(T, intrinsics.simd_ceil(_to_simd4(x)))
}
@(require_results)
floor :: proc "contextless" (x: $T) -> (out: T) where IS_FLOAT(ELEM_TYPE(T)) {
when IS_ARRAY(T) {
for i in 0..<len(T) {
out[i] = #force_inline math.floor(x[i])
}
} else {
out = #force_inline math.floor(x)
}
return
return _from_simd4(T, intrinsics.simd_floor(_to_simd4(x)))
}
@(require_results)
@@ -447,6 +434,11 @@ round :: proc "contextless" (x: $T) -> (out: T) where IS_FLOAT(ELEM_TYPE(T)) {
return
}
@(require_results)
trunc :: proc "contextless" (x: $T) -> (out: T) where IS_NUMERIC(ELEM_TYPE(T)) {
return _from_simd4(T, intrinsics.simd_trunc(_to_simd4(x)))
}
@(require_results)
fract :: proc "contextless" (x: $T) -> T where IS_FLOAT(ELEM_TYPE(T)) {
f := #force_inline floor(x)
@@ -613,3 +605,46 @@ not :: proc "contextless" (x: $A/[$N]bool) -> (out: A) {
}
return
}
@(require_results)
_to_simd4 :: #force_inline proc "contextless" (a: $T) -> (out: #simd[4]ELEM_TYPE(T)) where IS_NUMERIC(ELEM_TYPE(T)) #no_bounds_check {
when IS_ARRAY(T) {
when len(T) == 1 {
_a: [4]ELEM_TYPE(T)
_a.x = a.x
return transmute(#simd[4]ELEM_TYPE(T))_a
} else when len(T) == 2 {
_a: [4]ELEM_TYPE(T)
_a.xy = a
return transmute(#simd[4]ELEM_TYPE(T))_a
} else when len(T) == 3 {
_a: [4]ELEM_TYPE(T)
_a.xyz = a
return transmute(#simd[4]ELEM_TYPE(T))_a
} else {
return transmute(#simd[4]ELEM_TYPE(T))a
}
} else {
_a: [4]ELEM_TYPE(T)
_a.x = a
return transmute(#simd[4]ELEM_TYPE(T))_a
}
}
@(require_results)
_from_simd4 :: #force_inline proc "contextless" ($T: typeid, a: $V/#simd[4]$E) -> T where IS_NUMERIC(ELEM_TYPE(T)) #no_bounds_check {
when IS_ARRAY(T) {
when len(T) == 1 {
return (transmute([4]ELEM_TYPE(T))a).x
} else when len(T) == 2 {
return (transmute([4]ELEM_TYPE(T))a).xy
} else when len(T) == 3 {
return (transmute([4]ELEM_TYPE(T))a).xyz
} else {
return transmute([4]ELEM_TYPE(T))a
}
} else {
return (transmute([4]ELEM_TYPE(T))a).x
}
}

View File

@@ -46,11 +46,23 @@ scalar_dot :: proc "contextless" (a, b: $T) -> T where IS_FLOAT(T), !IS_ARRAY(T)
@(require_results)
vector_dot :: proc "contextless" (a, b: $T/[$N]$E) -> (c: E) where IS_NUMERIC(E) #no_bounds_check {
for i in 0..<N {
c += a[i] * b[i]
ab := a * b
when N == 1 {
return ab.x
} else when N == 2 {
return ab.x + ab.y
} else when N == 3 {
return ab.x + ab.y + ab.z
} else when N == 4 {
return ab.x + ab.y + ab.z + ab.w
} else {
for elem in ab {
c += elem
}
return c
}
return
}
@(require_results)
quaternion64_dot :: proc "contextless" (a, b: $T/quaternion64) -> (c: f16) {
return a.w*b.w + a.x*b.x + a.y*b.y + a.z*b.z
@@ -86,11 +98,8 @@ vector_cross2 :: proc "contextless" (a, b: $T/[2]$E) -> E where IS_NUMERIC(E) {
}
@(require_results)
vector_cross3 :: proc "contextless" (a, b: $T/[3]$E) -> (c: T) where IS_NUMERIC(E) {
c[0] = a[1]*b[2] - b[1]*a[2]
c[1] = a[2]*b[0] - b[2]*a[0]
c[2] = a[0]*b[1] - b[0]*a[1]
return
vector_cross3 :: proc "contextless" (a, b: $T/[3]$E) -> (c: T) where IS_NUMERIC(E) #no_bounds_check {
return a.yzx*b.zxy - b.yzx*a.zxy
}
@(require_results)
@@ -130,12 +139,12 @@ normalize0 :: proc{vector_normalize0, quaternion_normalize0}
@(require_results)
vector_length :: proc "contextless" (v: $T/[$N]$E) -> E where IS_FLOAT(E) {
return math.sqrt(dot(v, v))
return #force_inline math.sqrt(#force_inline dot(v, v))
}
@(require_results)
vector_length2 :: proc "contextless" (v: $T/[$N]$E) -> E where IS_NUMERIC(E) {
return dot(v, v)
return #force_inline dot(v, v)
}
@(require_results)