From 45c91702a910f7b8df0ac90f84667f6d94ffb9d3 Mon Sep 17 00:00:00 2001 From: Ben Visness Date: Mon, 11 Mar 2019 13:12:48 -0500 Subject: [PATCH] Added SSE support for Quaternion operations (#97) (#98) * Added SSE support for Quaternion operations (#97) * Added SSE support for Quaternion operations O2 | Function | SSE | NO SSE | ==================================================== | Inverse | 163 (0.89s) | 165 (1.89s) | | NLerp | 330 (1.70s) | 330 (1.75s) | | Normalize | 169 (1.03s) | 169 (1.06s) | | Dot | 22 (1.15s) | 23 (1.14s) | | DivF | 23 (0.72s) | 23 (0.82s) | | MulF | 22 (0.75s) | 22 (0.79s) | | Mul | 24 (1.14s) | 23 (1.24s) | | Sub | 23 (1.17s) | 37 (1.20s) | | Add | 23 (1.20s) | 24 (1.19s) | O0 | Function | SSE | NO SSE | ==================================================== | Inverse | 394 (1.62s) | 430 (3.05s) | | NLerp | 694 (2.71s) | 1035(4.81s) | | Normalize | 374 (1.58s) | 412 (2.95s) | | Dot | 81 (1.83s) | 23 (2.50s) | | DivF | 61 (1.12s) | 25 (2.37s) | | MulF | 58 (1.09s) | 23 (2.31s) | | Mul | 94 (1.97s) | 42 (2.88s) | | Sub | 75 (1.83s) | 23 (2.82s) | | Add | 75 (1.81s) | 23 (2.81s) | * Fixed quaternion multiplication Old quaternion multiplication had a bug, this is a different approach. * Added release notes and version for 1.9.0 --- HandmadeMath.h | 80 +++++++++++++++++++++++++++++++++++++++++++++----- README.md | 1 + 2 files changed, 74 insertions(+), 7 deletions(-) diff --git a/HandmadeMath.h b/HandmadeMath.h index 2b4d59c..d274351 100644 --- a/HandmadeMath.h +++ b/HandmadeMath.h @@ -1,5 +1,5 @@ /* - HandmadeMath.h v1.8.0 + HandmadeMath.h v1.9.0 This is a single header file with a bunch of useful functions for game and graphics math operations. @@ -83,6 +83,7 @@ Gingerbill (@TheGingerBill) Ben Visness (@bvisness) Trinton Bullard (@Peliex_Dev) + @AntonDan Fixes: Jeroen van Rijn (@J_vanRijn) @@ -379,6 +380,10 @@ typedef union hmm_quaternion }; float Elements[4]; + +#ifdef HANDMADE_MATH__USE_SSE + __m128 InternalElementsSSE; +#endif } hmm_quaternion; typedef int32_t hmm_bool; @@ -1228,10 +1233,14 @@ HMM_INLINE hmm_quaternion HMM_Quaternion(float X, float Y, float Z, float W) { hmm_quaternion Result; +#ifdef HANDMADE_MATH__USE_SSE + Result.InternalElementsSSE = _mm_setr_ps(X, Y, Z, W); +#else Result.X = X; Result.Y = Y; Result.Z = Z; Result.W = W; +#endif return (Result); } @@ -1240,10 +1249,14 @@ HMM_INLINE hmm_quaternion HMM_QuaternionV4(hmm_vec4 Vector) { hmm_quaternion Result; +#ifdef HANDMADE_MATH__USE_SSE + Result.InternalElementsSSE = Vector.InternalElementsSSE; +#else Result.X = Vector.X; Result.Y = Vector.Y; Result.Z = Vector.Z; Result.W = Vector.W; +#endif return (Result); } @@ -1252,10 +1265,15 @@ HMM_INLINE hmm_quaternion HMM_AddQuaternion(hmm_quaternion Left, hmm_quaternion { hmm_quaternion Result; +#ifdef HANDMADE_MATH__USE_SSE + Result.InternalElementsSSE = _mm_add_ps(Left.InternalElementsSSE, Right.InternalElementsSSE); +#else + Result.X = Left.X + Right.X; Result.Y = Left.Y + Right.Y; Result.Z = Left.Z + Right.Z; Result.W = Left.W + Right.W; +#endif return (Result); } @@ -1264,10 +1282,15 @@ HMM_INLINE hmm_quaternion HMM_SubtractQuaternion(hmm_quaternion Left, hmm_quater { hmm_quaternion Result; +#ifdef HANDMADE_MATH__USE_SSE + Result.InternalElementsSSE = _mm_sub_ps(Left.InternalElementsSSE, Right.InternalElementsSSE); +#else + Result.X = Left.X - Right.X; Result.Y = Left.Y - Right.Y; Result.Z = Left.Z - Right.Z; Result.W = Left.W - Right.W; +#endif return (Result); } @@ -1276,10 +1299,28 @@ HMM_INLINE hmm_quaternion HMM_MultiplyQuaternion(hmm_quaternion Left, hmm_quater { hmm_quaternion Result; +#ifdef HANDMADE_MATH__USE_SSE + __m128 SSEResultOne = _mm_xor_ps(_mm_shuffle_ps(Left.InternalElementsSSE, Left.InternalElementsSSE, _MM_SHUFFLE(0, 0, 0, 0)), _mm_setr_ps(0.f, -0.f, 0.f, -0.f)); + __m128 SSEResultTwo = _mm_shuffle_ps(Right.InternalElementsSSE, Right.InternalElementsSSE, _MM_SHUFFLE(0, 1, 2, 3)); + __m128 SSEResultThree = _mm_mul_ps(SSEResultTwo, SSEResultOne); + + SSEResultOne = _mm_xor_ps(_mm_shuffle_ps(Left.InternalElementsSSE, Left.InternalElementsSSE, _MM_SHUFFLE(1, 1, 1, 1)) , _mm_setr_ps(0.f, 0.f, -0.f, -0.f)); + SSEResultTwo = _mm_shuffle_ps(Right.InternalElementsSSE, Right.InternalElementsSSE, _MM_SHUFFLE(1, 0, 3, 2)); + SSEResultThree = _mm_add_ps(SSEResultThree, _mm_mul_ps(SSEResultTwo, SSEResultOne)); + + SSEResultOne = _mm_xor_ps(_mm_shuffle_ps(Left.InternalElementsSSE, Left.InternalElementsSSE, _MM_SHUFFLE(2, 2, 2, 2)), _mm_setr_ps(-0.f, 0.f, 0.f, -0.f)); + SSEResultTwo = _mm_shuffle_ps(Right.InternalElementsSSE, Right.InternalElementsSSE, _MM_SHUFFLE(2, 3, 0, 1)); + SSEResultThree = _mm_add_ps(SSEResultThree, _mm_mul_ps(SSEResultTwo, SSEResultOne)); + + SSEResultOne = _mm_shuffle_ps(Left.InternalElementsSSE, Left.InternalElementsSSE, _MM_SHUFFLE(3, 3, 3, 3)); + SSEResultTwo = _mm_shuffle_ps(Right.InternalElementsSSE, Right.InternalElementsSSE, _MM_SHUFFLE(3, 2, 1, 0)); + Result.InternalElementsSSE = _mm_add_ps(SSEResultThree, _mm_mul_ps(SSEResultTwo, SSEResultOne)); +#else Result.X = (Left.X * Right.W) + (Left.Y * Right.Z) - (Left.Z * Right.Y) + (Left.W * Right.X); Result.Y = (-Left.X * Right.Z) + (Left.Y * Right.W) + (Left.Z * Right.X) + (Left.W * Right.Y); Result.Z = (Left.X * Right.Y) - (Left.Y * Right.X) + (Left.Z * Right.W) + (Left.W * Right.Z); Result.W = (-Left.X * Right.X) - (Left.Y * Right.Y) - (Left.Z * Right.Z) + (Left.W * Right.W); +#endif return (Result); } @@ -1288,10 +1329,15 @@ HMM_INLINE hmm_quaternion HMM_MultiplyQuaternionF(hmm_quaternion Left, float Mul { hmm_quaternion Result; +#ifdef HANDMADE_MATH__USE_SSE + __m128 Scalar = _mm_set1_ps(Multiplicative); + Result.InternalElementsSSE = _mm_mul_ps(Left.InternalElementsSSE, Scalar); +#else Result.X = Left.X * Multiplicative; Result.Y = Left.Y * Multiplicative; Result.Z = Left.Z * Multiplicative; Result.W = Left.W * Multiplicative; +#endif return (Result); } @@ -1300,10 +1346,15 @@ HMM_INLINE hmm_quaternion HMM_DivideQuaternionF(hmm_quaternion Left, float Divid { hmm_quaternion Result; +#ifdef HANDMADE_MATH__USE_SSE + __m128 Scalar = _mm_set1_ps(Dividend); + Result.InternalElementsSSE = _mm_div_ps(Left.InternalElementsSSE, Scalar); +#else Result.X = Left.X / Dividend; Result.Y = Left.Y / Dividend; Result.Z = Left.Z / Dividend; Result.W = Left.W / Dividend; +#endif return (Result); } @@ -1312,7 +1363,18 @@ HMM_EXTERN hmm_quaternion HMM_InverseQuaternion(hmm_quaternion Left); HMM_INLINE float HMM_DotQuaternion(hmm_quaternion Left, hmm_quaternion Right) { - float Result = (Left.X * Right.X) + (Left.Y * Right.Y) + (Left.Z * Right.Z) + (Left.W * Right.W); + float Result; + +#ifdef HANDMADE_MATH__USE_SSE + __m128 SSEResultOne = _mm_mul_ps(Left.InternalElementsSSE, Right.InternalElementsSSE); + __m128 SSEResultTwo = _mm_shuffle_ps(SSEResultOne, SSEResultOne, _MM_SHUFFLE(2, 3, 0, 1)); + SSEResultOne = _mm_add_ps(SSEResultOne, SSEResultTwo); + SSEResultTwo = _mm_shuffle_ps(SSEResultOne, SSEResultOne, _MM_SHUFFLE(0, 1, 2, 3)); + SSEResultOne = _mm_add_ps(SSEResultOne, SSEResultTwo); + _mm_store_ss(&Result, SSEResultOne); +#else + Result = (Left.X * Right.X) + (Left.Y * Right.Y) + (Left.Z * Right.Z) + (Left.W * Right.W); +#endif return (Result); } @@ -1331,11 +1393,18 @@ HMM_INLINE hmm_quaternion HMM_NLerp(hmm_quaternion Left, float Time, hmm_quatern { hmm_quaternion Result; +#ifdef HANDMADE_MATH__USE_SSE + __m128 ScalarLeft = _mm_set1_ps(1.0f - Time); + __m128 ScalarRight = _mm_set1_ps(Time); + __m128 SSEResultOne = _mm_mul_ps(Left.InternalElementsSSE, ScalarLeft); + __m128 SSEResultTwo = _mm_mul_ps(Right.InternalElementsSSE, ScalarRight); + Result.InternalElementsSSE = _mm_add_ps(SSEResultOne, SSEResultTwo); +#else Result.X = HMM_Lerp(Left.X, Time, Right.X); Result.Y = HMM_Lerp(Left.Y, Time, Right.Y); Result.Z = HMM_Lerp(Left.Z, Time, Right.Z); Result.W = HMM_Lerp(Left.W, Time, Right.W); - +#endif Result = HMM_NormalizeQuaternion(Result); return (Result); @@ -2343,10 +2412,7 @@ hmm_quaternion HMM_InverseQuaternion(hmm_quaternion Left) Norm = HMM_SquareRootF(HMM_DotQuaternion(Left, Left)); NormSquared = Norm * Norm; - Result.X = Conjugate.X / NormSquared; - Result.Y = Conjugate.Y / NormSquared; - Result.Z = Conjugate.Z / NormSquared; - Result.W = Conjugate.W / NormSquared; + Result = HMM_DivideQuaternionF(Conjugate, NormSquared); return (Result); } diff --git a/README.md b/README.md index b9dbc52..15aa3ae 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ To get started, go download [the latest release](https://github.com/HandmadeMath Version | Changes | ----------------|----------------| +**1.9.0** | Added SSE versions of quaternion operations. | **1.8.0** | Added fast vector normalization routines that use fast inverse square roots. **1.7.1** | Changed operator[] to take a const ref int instead of an int. **1.7.0** | Renamed the 'Rows' member of hmm_mat4 to 'Columns'. Since our matrices are column-major, this should have been named 'Columns' from the start. 'Rows' is still present, but has been deprecated.