NEON Quaternion Multiply

2026-01-08 04:13:10 +00:00 · 2023-12-12 01:46:56 +00:00
parent fcc510f767
commit 623215b228
1 changed files with 13 additions and 4 deletions
--- a/HandmadeMath.h
+++ b/HandmadeMath.h
@@ -2176,11 +2176,20 @@ static inline HMM_Quat HMM_MulQ(HMM_Quat Left, HMM_Quat Right)
    SSEResultOne = _mm_shuffle_ps(Left.SSE, Left.SSE, _MM_SHUFFLE(3, 3, 3, 3));
    SSEResultTwo = _mm_shuffle_ps(Right.SSE, Right.SSE, _MM_SHUFFLE(3, 2, 1, 0));
    Result.SSE = _mm_add_ps(SSEResultThree, _mm_mul_ps(SSEResultTwo, SSEResultOne));
+#elif HANDMADE_MATH__USE_NEON
+    float32x4_t Right1032 = vrev64q_f32(Right.NEON);
+    float32x4_t Right3210 = vcombine_f32(vget_high_f32(Right1032), vget_low_f32(Right1032));
+    float32x4_t Right2301 = vrev64q_f32(Right3210);
+    
+    float32x4_t FirstSign = {1.0f, -1.0f, 1.0f, -1.0f};
+    Result.NEON = vmulq_f32(Right3210, vmulq_f32(vdupq_laneq_f32(Left.NEON, 0), FirstSign));
+    float32x4_t SecondSign = {1.0f, 1.0f, -1.0f, -1.0f};
+    Result.NEON = vfmaq_f32(Result.NEON, Right2301, vmulq_f32(vdupq_laneq_f32(Left.NEON, 1), SecondSign));
+    float32x4_t ThirdSign = {-1.0f, 1.0f, 1.0f, -1.0f};
+    Result.NEON = vfmaq_f32(Result.NEON, Right1032, vmulq_f32(vdupq_laneq_f32(Left.NEON, 2), ThirdSign));
+    Result.NEON = vfmaq_laneq_f32(Result.NEON, Right.NEON, Left.NEON, 3);
+    
 #else
-# if HANDMADE_MATH__USE_NEON
-    // TOOD (jack): go look up how quaternion multiplication works and implment it in neon, we don't have xor.
-#  warning "Vectorized Quaternion Multiply not yet implemented in NEON"
-# endif 
    Result.X =  Right.Elements[3] * +Left.Elements[0];
    Result.Y =  Right.Elements[2] * -Left.Elements[0];
    Result.Z =  Right.Elements[1] * +Left.Elements[0];