From 47371791b30cc26c364edd1ec2681eff428cb894 Mon Sep 17 00:00:00 2001 From: jakubtomsu <66876057+jakubtomsu@users.noreply.github.com> Date: Fri, 10 Apr 2026 17:08:40 +0200 Subject: [PATCH 1/3] faster vector_dot and vector_cross3 --- core/math/linalg/general.odin | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/core/math/linalg/general.odin b/core/math/linalg/general.odin index ea3a4e84a..ae4acc8bb 100644 --- a/core/math/linalg/general.odin +++ b/core/math/linalg/general.odin @@ -45,12 +45,24 @@ scalar_dot :: proc "contextless" (a, b: $T) -> T where IS_FLOAT(T), !IS_ARRAY(T) } @(require_results) -vector_dot :: proc "contextless" (a, b: $T/[$N]$E) -> (c: E) where IS_NUMERIC(E) #no_bounds_check { - for i in 0.. (c: E) where IS_NUMERIC(E) #no_bounds_check { + ab := a * b + when N == 1 { + return ab.x + } else when N == 2 { + return ab.x + ab.y + } else when N == 3 { + return ab.x + ab.y + ab.z + } else when N == 4 { + return ab.x + ab.y + ab.z + ab.w + } else { + for elem in ab { + c += elem + } + return c } - return } + @(require_results) quaternion64_dot :: proc "contextless" (a, b: $T/quaternion64) -> (c: f16) { return a.w*b.w + a.x*b.x + a.y*b.y + a.z*b.z @@ -86,11 +98,8 @@ vector_cross2 :: proc "contextless" (a, b: $T/[2]$E) -> E where IS_NUMERIC(E) { } @(require_results) -vector_cross3 :: proc "contextless" (a, b: $T/[3]$E) -> (c: T) where IS_NUMERIC(E) { - c[0] = a[1]*b[2] - b[1]*a[2] - c[1] = a[2]*b[0] - b[2]*a[0] - c[2] = a[0]*b[1] - b[0]*a[1] - return +vector_cross3 :: proc "contextless" (a, b: $T/[3]$E) -> (c: T) where IS_NUMERIC(E) #no_bounds_check { + return a.yzx*b.zxy - b.yzx*a.zxy } @(require_results) @@ -130,12 +139,12 @@ normalize0 :: proc{vector_normalize0, quaternion_normalize0} @(require_results) vector_length :: proc "contextless" (v: $T/[$N]$E) -> E where IS_FLOAT(E) { - return math.sqrt(dot(v, v)) + return #force_inline math.sqrt(#force_inline dot(v, v)) } @(require_results) vector_length2 :: proc "contextless" (v: $T/[$N]$E) -> E where IS_NUMERIC(E) { - return dot(v, v) + return #force_inline dot(v, v) } @(require_results) From d5e4e61c1cfe0acd58bdbe20d782dd5c4aa65026 Mon Sep 17 00:00:00 2001 From: jakubtomsu <66876057+jakubtomsu@users.noreply.github.com> Date: Fri, 10 Apr 2026 17:10:24 +0200 Subject: [PATCH 2/3] implement floor, ceil and trunc with SIMD --- core/math/linalg/extended.odin | 67 ++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 16 deletions(-) diff --git a/core/math/linalg/extended.odin b/core/math/linalg/extended.odin index 22c37dd0e..0470054c3 100644 --- a/core/math/linalg/extended.odin +++ b/core/math/linalg/extended.odin @@ -1,6 +1,7 @@ package linalg import "base:builtin" +import "base:intrinsics" import "core:math" @(require_results) @@ -413,26 +414,12 @@ pow :: proc "contextless" (x, e: $T) -> (out: T) where IS_FLOAT(ELEM_TYPE(T)) { @(require_results) ceil :: proc "contextless" (x: $T) -> (out: T) where IS_FLOAT(ELEM_TYPE(T)) { - when IS_ARRAY(T) { - for i in 0.. (out: T) where IS_FLOAT(ELEM_TYPE(T)) { - when IS_ARRAY(T) { - for i in 0.. (out: T) where IS_FLOAT(ELEM_TYPE(T)) { return } +@(require_results) +trunc :: proc "contextless" (x: $T) -> (out: T) where IS_NUMERIC(ELEM_TYPE(T)) { + return _from_simd4(T, intrinsics.simd_trunc(_to_simd4(x))) +} + @(require_results) fract :: proc "contextless" (x: $T) -> T where IS_FLOAT(ELEM_TYPE(T)) { f := #force_inline floor(x) @@ -613,3 +605,46 @@ not :: proc "contextless" (x: $A/[$N]bool) -> (out: A) { } return } + + +@(require_results) +_to_simd4 :: #force_inline proc "contextless" (a: $T) -> (out: #simd[4]ELEM_TYPE(T)) where IS_NUMERIC(ELEM_TYPE(T)) #no_bounds_check { + when IS_ARRAY(T) { + when len(T) == 1 { + _a: [4]ELEM_TYPE(T) + _a.x = a.x + return transmute(#simd[4]ELEM_TYPE(T))_a + } else when len(T) == 2 { + _a: [4]ELEM_TYPE(T) + _a.xy = a + return transmute(#simd[4]ELEM_TYPE(T))_a + } else when len(T) == 3 { + _a: [4]ELEM_TYPE(T) + _a.xyz = a + return transmute(#simd[4]ELEM_TYPE(T))_a + } else { + return transmute(#simd[4]ELEM_TYPE(T))a + } + } else { + _a: [4]ELEM_TYPE(T) + _a.x = a + return transmute(#simd[4]ELEM_TYPE(T))_a + } +} + +@(require_results) +_from_simd4 :: #force_inline proc "contextless" ($T: typeid, a: $V/#simd[4]$E) -> T where IS_NUMERIC(ELEM_TYPE(T)) #no_bounds_check { + when IS_ARRAY(T) { + when len(T) == 1 { + return (transmute([4]ELEM_TYPE(T))a).x + } else when len(T) == 2 { + return (transmute([4]ELEM_TYPE(T))a).xy + } else when len(T) == 3 { + return (transmute([4]ELEM_TYPE(T))a).xyz + } else { + return transmute([4]ELEM_TYPE(T))a + } + } else { + return (transmute([4]ELEM_TYPE(T))a).x + } +} From e15230c1945b9342bb814891b563f50e0ffac5d6 Mon Sep 17 00:00:00 2001 From: jakubtomsu <66876057+jakubtomsu@users.noreply.github.com> Date: Fri, 10 Apr 2026 17:21:03 +0200 Subject: [PATCH 3/3] remove force_inline from vector_dot --- core/math/linalg/general.odin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/math/linalg/general.odin b/core/math/linalg/general.odin index ae4acc8bb..956fdb919 100644 --- a/core/math/linalg/general.odin +++ b/core/math/linalg/general.odin @@ -45,7 +45,7 @@ scalar_dot :: proc "contextless" (a, b: $T) -> T where IS_FLOAT(T), !IS_ARRAY(T) } @(require_results) -vector_dot :: #force_inline proc "contextless" (a, b: $T/[$N]$E) -> (c: E) where IS_NUMERIC(E) #no_bounds_check { +vector_dot :: proc "contextless" (a, b: $T/[$N]$E) -> (c: E) where IS_NUMERIC(E) #no_bounds_check { ab := a * b when N == 1 { return ab.x