mirror of
https://github.com/HandmadeMath/HandmadeMath.git
synced 2025-09-05 17:58:14 +00:00
180
HandmadeMath.h
180
HandmadeMath.h
@@ -105,10 +105,15 @@
|
||||
# define ASSERT_COVERED(a)
|
||||
#endif
|
||||
|
||||
#ifdef HANDMADE_MATH_NO_SSE
|
||||
# warning "HANDMADE_MATH_NO_SSE is deprecated, use HANDMADE_MATH_NO_SIMD instead"
|
||||
# define HANDMADE_MATH_NO_SIMD
|
||||
#endif
|
||||
|
||||
/* let's figure out if SSE is really available (unless disabled anyway)
|
||||
(it isn't on non-x86/x86_64 platforms or even x86 without explicit SSE support)
|
||||
=> only use "#ifdef HANDMADE_MATH__USE_SSE" to check for SSE support below this block! */
|
||||
#ifndef HANDMADE_MATH_NO_SSE
|
||||
#ifndef HANDMADE_MATH_NO_SIMD
|
||||
# ifdef _MSC_VER /* MSVC supports SSE in amd64 mode or _M_IX86_FP >= 1 (2 means SSE2) */
|
||||
# if defined(_M_AMD64) || ( defined(_M_IX86_FP) && _M_IX86_FP >= 1 )
|
||||
# define HANDMADE_MATH__USE_SSE 1
|
||||
@@ -118,7 +123,10 @@
|
||||
# define HANDMADE_MATH__USE_SSE 1
|
||||
# endif /* __SSE__ */
|
||||
# endif /* not _MSC_VER */
|
||||
#endif /* #ifndef HANDMADE_MATH_NO_SSE */
|
||||
# ifdef __ARM_NEON
|
||||
# define HANDMADE_MATH__USE_NEON 1
|
||||
# endif /* NEON Supported */
|
||||
#endif /* #ifndef HANDMADE_MATH_NO_SIMD */
|
||||
|
||||
#if (!defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
|
||||
# define HANDMADE_MATH__USE_C11_GENERICS 1
|
||||
@@ -128,6 +136,10 @@
|
||||
# include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_NEON
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable:4201)
|
||||
#endif
|
||||
@@ -354,6 +366,10 @@ typedef union HMM_Vec4
|
||||
__m128 SSE;
|
||||
#endif
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_NEON
|
||||
float32x4_t NEON;
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
inline float &operator[](int Index) { return Elements[Index]; }
|
||||
inline const float &operator[](int Index) const { return Elements[Index]; }
|
||||
@@ -414,6 +430,9 @@ typedef union HMM_Quat
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
__m128 SSE;
|
||||
#endif
|
||||
#ifdef HANDMADE_MATH__USE_NEON
|
||||
float32x4_t NEON;
|
||||
#endif
|
||||
} HMM_Quat;
|
||||
|
||||
typedef signed int HMM_Bool;
|
||||
@@ -503,6 +522,10 @@ static inline float HMM_SqrtF(float Float)
|
||||
__m128 In = _mm_set_ss(Float);
|
||||
__m128 Out = _mm_sqrt_ss(In);
|
||||
Result = _mm_cvtss_f32(Out);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
float32x4_t In = vdupq_n_f32(Float);
|
||||
float32x4_t Out = vsqrtq_f32(In);
|
||||
Result = vgetq_lane_f32(Out, 0);
|
||||
#else
|
||||
Result = HMM_SQRTF(Float);
|
||||
#endif
|
||||
@@ -593,6 +616,9 @@ static inline HMM_Vec4 HMM_V4(float X, float Y, float Z, float W)
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
Result.SSE = _mm_setr_ps(X, Y, Z, W);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
float32x4_t v = {X, Y, Z, W};
|
||||
Result.NEON = v;
|
||||
#else
|
||||
Result.X = X;
|
||||
Result.Y = Y;
|
||||
@@ -612,6 +638,9 @@ static inline HMM_Vec4 HMM_V4V(HMM_Vec3 Vector, float W)
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
Result.SSE = _mm_setr_ps(Vector.X, Vector.Y, Vector.Z, W);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
float32x4_t v = {Vector.X, Vector.Y, Vector.Z, W};
|
||||
Result.NEON = v;
|
||||
#else
|
||||
Result.XYZ = Vector;
|
||||
Result.W = W;
|
||||
@@ -659,6 +688,8 @@ static inline HMM_Vec4 HMM_AddV4(HMM_Vec4 Left, HMM_Vec4 Right)
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
Result.SSE = _mm_add_ps(Left.SSE, Right.SSE);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
Result.NEON = vaddq_f32(Left.NEON, Right.NEON);
|
||||
#else
|
||||
Result.X = Left.X + Right.X;
|
||||
Result.Y = Left.Y + Right.Y;
|
||||
@@ -703,6 +734,8 @@ static inline HMM_Vec4 HMM_SubV4(HMM_Vec4 Left, HMM_Vec4 Right)
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
Result.SSE = _mm_sub_ps(Left.SSE, Right.SSE);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
Result.NEON = vsubq_f32(Left.NEON, Right.NEON);
|
||||
#else
|
||||
Result.X = Left.X - Right.X;
|
||||
Result.Y = Left.Y - Right.Y;
|
||||
@@ -772,6 +805,8 @@ static inline HMM_Vec4 HMM_MulV4(HMM_Vec4 Left, HMM_Vec4 Right)
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
Result.SSE = _mm_mul_ps(Left.SSE, Right.SSE);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
Result.NEON = vmulq_f32(Left.NEON, Right.NEON);
|
||||
#else
|
||||
Result.X = Left.X * Right.X;
|
||||
Result.Y = Left.Y * Right.Y;
|
||||
@@ -792,6 +827,8 @@ static inline HMM_Vec4 HMM_MulV4F(HMM_Vec4 Left, float Right)
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
__m128 Scalar = _mm_set1_ps(Right);
|
||||
Result.SSE = _mm_mul_ps(Left.SSE, Scalar);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
Result.NEON = vmulq_n_f32(Left.NEON, Right);
|
||||
#else
|
||||
Result.X = Left.X * Right;
|
||||
Result.Y = Left.Y * Right;
|
||||
@@ -861,6 +898,8 @@ static inline HMM_Vec4 HMM_DivV4(HMM_Vec4 Left, HMM_Vec4 Right)
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
Result.SSE = _mm_div_ps(Left.SSE, Right.SSE);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
Result.NEON = vdivq_f32(Left.NEON, Right.NEON);
|
||||
#else
|
||||
Result.X = Left.X / Right.X;
|
||||
Result.Y = Left.Y / Right.Y;
|
||||
@@ -881,6 +920,9 @@ static inline HMM_Vec4 HMM_DivV4F(HMM_Vec4 Left, float Right)
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
__m128 Scalar = _mm_set1_ps(Right);
|
||||
Result.SSE = _mm_div_ps(Left.SSE, Scalar);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
float32x4_t Scalar = vdupq_n_f32(Right);
|
||||
Result.NEON = vdivq_f32(Left.NEON, Scalar);
|
||||
#else
|
||||
Result.X = Left.X / Right;
|
||||
Result.Y = Left.Y / Right;
|
||||
@@ -943,6 +985,11 @@ static inline float HMM_DotV4(HMM_Vec4 Left, HMM_Vec4 Right)
|
||||
SSEResultTwo = _mm_shuffle_ps(SSEResultOne, SSEResultOne, _MM_SHUFFLE(0, 1, 2, 3));
|
||||
SSEResultOne = _mm_add_ps(SSEResultOne, SSEResultTwo);
|
||||
_mm_store_ss(&Result, SSEResultOne);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
float32x4_t NEONMultiplyResult = vmulq_f32(Left.NEON, Right.NEON);
|
||||
float32x4_t NEONHalfAdd = vpaddq_f32(NEONMultiplyResult, NEONMultiplyResult);
|
||||
float32x4_t NEONFullAdd = vpaddq_f32(NEONHalfAdd, NEONHalfAdd);
|
||||
Result = vgetq_lane_f32(NEONFullAdd, 0);
|
||||
#else
|
||||
Result = ((Left.X * Right.X) + (Left.Z * Right.Z)) + ((Left.Y * Right.Y) + (Left.W * Right.W));
|
||||
#endif
|
||||
@@ -1071,6 +1118,11 @@ static inline HMM_Vec4 HMM_LinearCombineV4M4(HMM_Vec4 Left, HMM_Mat4 Right)
|
||||
Result.SSE = _mm_add_ps(Result.SSE, _mm_mul_ps(_mm_shuffle_ps(Left.SSE, Left.SSE, 0x55), Right.Columns[1].SSE));
|
||||
Result.SSE = _mm_add_ps(Result.SSE, _mm_mul_ps(_mm_shuffle_ps(Left.SSE, Left.SSE, 0xaa), Right.Columns[2].SSE));
|
||||
Result.SSE = _mm_add_ps(Result.SSE, _mm_mul_ps(_mm_shuffle_ps(Left.SSE, Left.SSE, 0xff), Right.Columns[3].SSE));
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
Result.NEON = vmulq_laneq_f32(Right.Columns[0].NEON, Left.NEON, 0);
|
||||
Result.NEON = vfmaq_laneq_f32(Result.NEON, Right.Columns[1].NEON, Left.NEON, 1);
|
||||
Result.NEON = vfmaq_laneq_f32(Result.NEON, Right.Columns[2].NEON, Left.NEON, 2);
|
||||
Result.NEON = vfmaq_laneq_f32(Result.NEON, Right.Columns[3].NEON, Left.NEON, 3);
|
||||
#else
|
||||
Result.X = Left.Elements[0] * Right.Columns[0].X;
|
||||
Result.Y = Left.Elements[0] * Right.Columns[0].Y;
|
||||
@@ -1465,22 +1517,33 @@ static inline HMM_Mat4 HMM_TransposeM4(HMM_Mat4 Matrix)
|
||||
{
|
||||
ASSERT_COVERED(HMM_TransposeM4);
|
||||
|
||||
HMM_Mat4 Result = Matrix;
|
||||
HMM_Mat4 Result;
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
Result = Matrix;
|
||||
_MM_TRANSPOSE4_PS(Result.Columns[0].SSE, Result.Columns[1].SSE, Result.Columns[2].SSE, Result.Columns[3].SSE);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
float32x4x4_t Transposed = vld4q_f32((float*)Matrix.Columns);
|
||||
Result.Columns[0].NEON = Transposed.val[0];
|
||||
Result.Columns[1].NEON = Transposed.val[1];
|
||||
Result.Columns[2].NEON = Transposed.val[2];
|
||||
Result.Columns[3].NEON = Transposed.val[3];
|
||||
#else
|
||||
Result.Elements[0][0] = Matrix.Elements[0][0];
|
||||
Result.Elements[0][1] = Matrix.Elements[1][0];
|
||||
Result.Elements[0][2] = Matrix.Elements[2][0];
|
||||
Result.Elements[0][3] = Matrix.Elements[3][0];
|
||||
Result.Elements[1][0] = Matrix.Elements[0][1];
|
||||
Result.Elements[1][1] = Matrix.Elements[1][1];
|
||||
Result.Elements[1][2] = Matrix.Elements[2][1];
|
||||
Result.Elements[1][3] = Matrix.Elements[3][1];
|
||||
Result.Elements[2][1] = Matrix.Elements[1][2];
|
||||
Result.Elements[2][0] = Matrix.Elements[0][2];
|
||||
Result.Elements[2][1] = Matrix.Elements[1][2];
|
||||
Result.Elements[2][2] = Matrix.Elements[2][2];
|
||||
Result.Elements[2][3] = Matrix.Elements[3][2];
|
||||
Result.Elements[3][0] = Matrix.Elements[0][3];
|
||||
Result.Elements[3][1] = Matrix.Elements[1][3];
|
||||
Result.Elements[3][2] = Matrix.Elements[2][3];
|
||||
Result.Elements[3][0] = Matrix.Elements[0][3];
|
||||
Result.Elements[3][3] = Matrix.Elements[3][3];
|
||||
#endif
|
||||
|
||||
return Result;
|
||||
@@ -1493,29 +1556,10 @@ static inline HMM_Mat4 HMM_AddM4(HMM_Mat4 Left, HMM_Mat4 Right)
|
||||
|
||||
HMM_Mat4 Result;
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
Result.Columns[0].SSE = _mm_add_ps(Left.Columns[0].SSE, Right.Columns[0].SSE);
|
||||
Result.Columns[1].SSE = _mm_add_ps(Left.Columns[1].SSE, Right.Columns[1].SSE);
|
||||
Result.Columns[2].SSE = _mm_add_ps(Left.Columns[2].SSE, Right.Columns[2].SSE);
|
||||
Result.Columns[3].SSE = _mm_add_ps(Left.Columns[3].SSE, Right.Columns[3].SSE);
|
||||
#else
|
||||
Result.Elements[0][0] = Left.Elements[0][0] + Right.Elements[0][0];
|
||||
Result.Elements[0][1] = Left.Elements[0][1] + Right.Elements[0][1];
|
||||
Result.Elements[0][2] = Left.Elements[0][2] + Right.Elements[0][2];
|
||||
Result.Elements[0][3] = Left.Elements[0][3] + Right.Elements[0][3];
|
||||
Result.Elements[1][0] = Left.Elements[1][0] + Right.Elements[1][0];
|
||||
Result.Elements[1][1] = Left.Elements[1][1] + Right.Elements[1][1];
|
||||
Result.Elements[1][2] = Left.Elements[1][2] + Right.Elements[1][2];
|
||||
Result.Elements[1][3] = Left.Elements[1][3] + Right.Elements[1][3];
|
||||
Result.Elements[2][0] = Left.Elements[2][0] + Right.Elements[2][0];
|
||||
Result.Elements[2][1] = Left.Elements[2][1] + Right.Elements[2][1];
|
||||
Result.Elements[2][2] = Left.Elements[2][2] + Right.Elements[2][2];
|
||||
Result.Elements[2][3] = Left.Elements[2][3] + Right.Elements[2][3];
|
||||
Result.Elements[3][0] = Left.Elements[3][0] + Right.Elements[3][0];
|
||||
Result.Elements[3][1] = Left.Elements[3][1] + Right.Elements[3][1];
|
||||
Result.Elements[3][2] = Left.Elements[3][2] + Right.Elements[3][2];
|
||||
Result.Elements[3][3] = Left.Elements[3][3] + Right.Elements[3][3];
|
||||
#endif
|
||||
Result.Columns[0] = HMM_AddV4(Left.Columns[0], Right.Columns[0]);
|
||||
Result.Columns[1] = HMM_AddV4(Left.Columns[1], Right.Columns[1]);
|
||||
Result.Columns[2] = HMM_AddV4(Left.Columns[2], Right.Columns[2]);
|
||||
Result.Columns[3] = HMM_AddV4(Left.Columns[3], Right.Columns[3]);
|
||||
|
||||
return Result;
|
||||
}
|
||||
@@ -1527,29 +1571,10 @@ static inline HMM_Mat4 HMM_SubM4(HMM_Mat4 Left, HMM_Mat4 Right)
|
||||
|
||||
HMM_Mat4 Result;
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
Result.Columns[0].SSE = _mm_sub_ps(Left.Columns[0].SSE, Right.Columns[0].SSE);
|
||||
Result.Columns[1].SSE = _mm_sub_ps(Left.Columns[1].SSE, Right.Columns[1].SSE);
|
||||
Result.Columns[2].SSE = _mm_sub_ps(Left.Columns[2].SSE, Right.Columns[2].SSE);
|
||||
Result.Columns[3].SSE = _mm_sub_ps(Left.Columns[3].SSE, Right.Columns[3].SSE);
|
||||
#else
|
||||
Result.Elements[0][0] = Left.Elements[0][0] - Right.Elements[0][0];
|
||||
Result.Elements[0][1] = Left.Elements[0][1] - Right.Elements[0][1];
|
||||
Result.Elements[0][2] = Left.Elements[0][2] - Right.Elements[0][2];
|
||||
Result.Elements[0][3] = Left.Elements[0][3] - Right.Elements[0][3];
|
||||
Result.Elements[1][0] = Left.Elements[1][0] - Right.Elements[1][0];
|
||||
Result.Elements[1][1] = Left.Elements[1][1] - Right.Elements[1][1];
|
||||
Result.Elements[1][2] = Left.Elements[1][2] - Right.Elements[1][2];
|
||||
Result.Elements[1][3] = Left.Elements[1][3] - Right.Elements[1][3];
|
||||
Result.Elements[2][0] = Left.Elements[2][0] - Right.Elements[2][0];
|
||||
Result.Elements[2][1] = Left.Elements[2][1] - Right.Elements[2][1];
|
||||
Result.Elements[2][2] = Left.Elements[2][2] - Right.Elements[2][2];
|
||||
Result.Elements[2][3] = Left.Elements[2][3] - Right.Elements[2][3];
|
||||
Result.Elements[3][0] = Left.Elements[3][0] - Right.Elements[3][0];
|
||||
Result.Elements[3][1] = Left.Elements[3][1] - Right.Elements[3][1];
|
||||
Result.Elements[3][2] = Left.Elements[3][2] - Right.Elements[3][2];
|
||||
Result.Elements[3][3] = Left.Elements[3][3] - Right.Elements[3][3];
|
||||
#endif
|
||||
Result.Columns[0] = HMM_SubV4(Left.Columns[0], Right.Columns[0]);
|
||||
Result.Columns[1] = HMM_SubV4(Left.Columns[1], Right.Columns[1]);
|
||||
Result.Columns[2] = HMM_SubV4(Left.Columns[2], Right.Columns[2]);
|
||||
Result.Columns[3] = HMM_SubV4(Left.Columns[3], Right.Columns[3]);
|
||||
|
||||
return Result;
|
||||
}
|
||||
@@ -1574,13 +1599,19 @@ static inline HMM_Mat4 HMM_MulM4F(HMM_Mat4 Matrix, float Scalar)
|
||||
ASSERT_COVERED(HMM_MulM4F);
|
||||
|
||||
HMM_Mat4 Result;
|
||||
|
||||
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
__m128 SSEScalar = _mm_set1_ps(Scalar);
|
||||
Result.Columns[0].SSE = _mm_mul_ps(Matrix.Columns[0].SSE, SSEScalar);
|
||||
Result.Columns[1].SSE = _mm_mul_ps(Matrix.Columns[1].SSE, SSEScalar);
|
||||
Result.Columns[2].SSE = _mm_mul_ps(Matrix.Columns[2].SSE, SSEScalar);
|
||||
Result.Columns[3].SSE = _mm_mul_ps(Matrix.Columns[3].SSE, SSEScalar);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
Result.Columns[0].NEON = vmulq_n_f32(Matrix.Columns[0].NEON, Scalar);
|
||||
Result.Columns[1].NEON = vmulq_n_f32(Matrix.Columns[1].NEON, Scalar);
|
||||
Result.Columns[2].NEON = vmulq_n_f32(Matrix.Columns[2].NEON, Scalar);
|
||||
Result.Columns[3].NEON = vmulq_n_f32(Matrix.Columns[3].NEON, Scalar);
|
||||
#else
|
||||
Result.Elements[0][0] = Matrix.Elements[0][0] * Scalar;
|
||||
Result.Elements[0][1] = Matrix.Elements[0][1] * Scalar;
|
||||
@@ -1623,6 +1654,12 @@ static inline HMM_Mat4 HMM_DivM4F(HMM_Mat4 Matrix, float Scalar)
|
||||
Result.Columns[1].SSE = _mm_div_ps(Matrix.Columns[1].SSE, SSEScalar);
|
||||
Result.Columns[2].SSE = _mm_div_ps(Matrix.Columns[2].SSE, SSEScalar);
|
||||
Result.Columns[3].SSE = _mm_div_ps(Matrix.Columns[3].SSE, SSEScalar);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
float32x4_t NEONScalar = vdupq_n_f32(Scalar);
|
||||
Result.Columns[0].NEON = vdivq_f32(Matrix.Columns[0].NEON, NEONScalar);
|
||||
Result.Columns[1].NEON = vdivq_f32(Matrix.Columns[1].NEON, NEONScalar);
|
||||
Result.Columns[2].NEON = vdivq_f32(Matrix.Columns[2].NEON, NEONScalar);
|
||||
Result.Columns[3].NEON = vdivq_f32(Matrix.Columns[3].NEON, NEONScalar);
|
||||
#else
|
||||
Result.Elements[0][0] = Matrix.Elements[0][0] / Scalar;
|
||||
Result.Elements[0][1] = Matrix.Elements[0][1] / Scalar;
|
||||
@@ -2065,6 +2102,9 @@ static inline HMM_Quat HMM_Q(float X, float Y, float Z, float W)
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
Result.SSE = _mm_setr_ps(X, Y, Z, W);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
float32x4_t v = { X, Y, Z, W };
|
||||
Result.NEON = v;
|
||||
#else
|
||||
Result.X = X;
|
||||
Result.Y = Y;
|
||||
@@ -2084,6 +2124,8 @@ static inline HMM_Quat HMM_QV4(HMM_Vec4 Vector)
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
Result.SSE = Vector.SSE;
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
Result.NEON = Vector.NEON;
|
||||
#else
|
||||
Result.X = Vector.X;
|
||||
Result.Y = Vector.Y;
|
||||
@@ -2103,6 +2145,8 @@ static inline HMM_Quat HMM_AddQ(HMM_Quat Left, HMM_Quat Right)
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
Result.SSE = _mm_add_ps(Left.SSE, Right.SSE);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
Result.NEON = vaddq_f32(Left.NEON, Right.NEON);
|
||||
#else
|
||||
|
||||
Result.X = Left.X + Right.X;
|
||||
@@ -2123,6 +2167,8 @@ static inline HMM_Quat HMM_SubQ(HMM_Quat Left, HMM_Quat Right)
|
||||
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
Result.SSE = _mm_sub_ps(Left.SSE, Right.SSE);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
Result.NEON = vsubq_f32(Left.NEON, Right.NEON);
|
||||
#else
|
||||
Result.X = Left.X - Right.X;
|
||||
Result.Y = Left.Y - Right.Y;
|
||||
@@ -2156,6 +2202,19 @@ static inline HMM_Quat HMM_MulQ(HMM_Quat Left, HMM_Quat Right)
|
||||
SSEResultOne = _mm_shuffle_ps(Left.SSE, Left.SSE, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
SSEResultTwo = _mm_shuffle_ps(Right.SSE, Right.SSE, _MM_SHUFFLE(3, 2, 1, 0));
|
||||
Result.SSE = _mm_add_ps(SSEResultThree, _mm_mul_ps(SSEResultTwo, SSEResultOne));
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
float32x4_t Right1032 = vrev64q_f32(Right.NEON);
|
||||
float32x4_t Right3210 = vcombine_f32(vget_high_f32(Right1032), vget_low_f32(Right1032));
|
||||
float32x4_t Right2301 = vrev64q_f32(Right3210);
|
||||
|
||||
float32x4_t FirstSign = {1.0f, -1.0f, 1.0f, -1.0f};
|
||||
Result.NEON = vmulq_f32(Right3210, vmulq_f32(vdupq_laneq_f32(Left.NEON, 0), FirstSign));
|
||||
float32x4_t SecondSign = {1.0f, 1.0f, -1.0f, -1.0f};
|
||||
Result.NEON = vfmaq_f32(Result.NEON, Right2301, vmulq_f32(vdupq_laneq_f32(Left.NEON, 1), SecondSign));
|
||||
float32x4_t ThirdSign = {-1.0f, 1.0f, 1.0f, -1.0f};
|
||||
Result.NEON = vfmaq_f32(Result.NEON, Right1032, vmulq_f32(vdupq_laneq_f32(Left.NEON, 2), ThirdSign));
|
||||
Result.NEON = vfmaq_laneq_f32(Result.NEON, Right.NEON, Left.NEON, 3);
|
||||
|
||||
#else
|
||||
Result.X = Right.Elements[3] * +Left.Elements[0];
|
||||
Result.Y = Right.Elements[2] * -Left.Elements[0];
|
||||
@@ -2191,6 +2250,8 @@ static inline HMM_Quat HMM_MulQF(HMM_Quat Left, float Multiplicative)
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
__m128 Scalar = _mm_set1_ps(Multiplicative);
|
||||
Result.SSE = _mm_mul_ps(Left.SSE, Scalar);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
Result.NEON = vmulq_n_f32(Left.NEON, Multiplicative);
|
||||
#else
|
||||
Result.X = Left.X * Multiplicative;
|
||||
Result.Y = Left.Y * Multiplicative;
|
||||
@@ -2211,6 +2272,9 @@ static inline HMM_Quat HMM_DivQF(HMM_Quat Left, float Divnd)
|
||||
#ifdef HANDMADE_MATH__USE_SSE
|
||||
__m128 Scalar = _mm_set1_ps(Divnd);
|
||||
Result.SSE = _mm_div_ps(Left.SSE, Scalar);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
float32x4_t Scalar = vdupq_n_f32(Divnd);
|
||||
Result.NEON = vdivq_f32(Left.NEON, Scalar);
|
||||
#else
|
||||
Result.X = Left.X / Divnd;
|
||||
Result.Y = Left.Y / Divnd;
|
||||
@@ -2235,6 +2299,11 @@ static inline float HMM_DotQ(HMM_Quat Left, HMM_Quat Right)
|
||||
SSEResultTwo = _mm_shuffle_ps(SSEResultOne, SSEResultOne, _MM_SHUFFLE(0, 1, 2, 3));
|
||||
SSEResultOne = _mm_add_ps(SSEResultOne, SSEResultTwo);
|
||||
_mm_store_ss(&Result, SSEResultOne);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
float32x4_t NEONMultiplyResult = vmulq_f32(Left.NEON, Right.NEON);
|
||||
float32x4_t NEONHalfAdd = vpaddq_f32(NEONMultiplyResult, NEONMultiplyResult);
|
||||
float32x4_t NEONFullAdd = vpaddq_f32(NEONHalfAdd, NEONHalfAdd);
|
||||
Result = vgetq_lane_f32(NEONFullAdd, 0);
|
||||
#else
|
||||
Result = ((Left.X * Right.X) + (Left.Z * Right.Z)) + ((Left.Y * Right.Y) + (Left.W * Right.W));
|
||||
#endif
|
||||
@@ -2278,6 +2347,10 @@ static inline HMM_Quat _HMM_MixQ(HMM_Quat Left, float MixLeft, HMM_Quat Right, f
|
||||
__m128 SSEResultOne = _mm_mul_ps(Left.SSE, ScalarLeft);
|
||||
__m128 SSEResultTwo = _mm_mul_ps(Right.SSE, ScalarRight);
|
||||
Result.SSE = _mm_add_ps(SSEResultOne, SSEResultTwo);
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
float32x4_t ScaledLeft = vmulq_n_f32(Left.NEON, MixLeft);
|
||||
float32x4_t ScaledRight = vmulq_n_f32(Right.NEON, MixRight);
|
||||
Result.NEON = vaddq_f32(ScaledLeft, ScaledRight);
|
||||
#else
|
||||
Result.X = Left.X*MixLeft + Right.X*MixRight;
|
||||
Result.Y = Left.Y*MixLeft + Right.Y*MixRight;
|
||||
@@ -3695,6 +3768,9 @@ static inline HMM_Vec4 operator-(HMM_Vec4 In)
|
||||
HMM_Vec4 Result;
|
||||
#if HANDMADE_MATH__USE_SSE
|
||||
Result.SSE = _mm_xor_ps(In.SSE, _mm_set1_ps(-0.0f));
|
||||
#elif defined(HANDMADE_MATH__USE_NEON)
|
||||
float32x4_t Zero = vdupq_n_f32(0.0f);
|
||||
Result.NEON = vsubq_f32(Zero, In.NEON);
|
||||
#else
|
||||
Result.X = -In.X;
|
||||
Result.Y = -In.Y;
|
||||
|
@@ -2,9 +2,9 @@ BUILD_DIR=./build
|
||||
|
||||
CXXFLAGS+=-g -Wall -Wextra -pthread -Wno-missing-braces -Wno-missing-field-initializers -Wfloat-equal
|
||||
|
||||
all: c c_no_sse cpp cpp_no_sse build_c_without_coverage build_cpp_without_coverage
|
||||
all: c c_no_simd cpp cpp_no_simd build_c_without_coverage build_cpp_without_coverage
|
||||
|
||||
build_all: build_c build_c_no_sse build_cpp build_cpp_no_sse
|
||||
build_all: build_c build_c_no_simd build_cpp build_cpp_no_simd
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILD_DIR)
|
||||
@@ -21,18 +21,18 @@ build_c: HandmadeMath.c test_impl
|
||||
-lm \
|
||||
&& $(CC) -ohmm_test_c HandmadeMath.o hmm_test.o -lm
|
||||
|
||||
c_no_sse: build_c_no_sse
|
||||
$(BUILD_DIR)/hmm_test_c_no_sse
|
||||
c_no_simd: build_c_no_simd
|
||||
$(BUILD_DIR)/hmm_test_c_no_simd
|
||||
|
||||
build_c_no_sse: HandmadeMath.c test_impl
|
||||
@echo "\nCompiling in C mode (no SSE)"
|
||||
build_c_no_simd: HandmadeMath.c test_impl
|
||||
@echo "\nCompiling in C mode (no SIMD)"
|
||||
mkdir -p $(BUILD_DIR)
|
||||
cd $(BUILD_DIR) \
|
||||
&& $(CC) $(CPPFLAGS) $(CXXFLAGS) -std=c99 \
|
||||
-DHANDMADE_MATH_NO_SSE \
|
||||
-DHANDMADE_MATH_NO_SIMD \
|
||||
-c ../HandmadeMath.c ../hmm_test.c \
|
||||
-lm \
|
||||
&& $(CC) -ohmm_test_c_no_sse HandmadeMath.o hmm_test.o -lm
|
||||
&& $(CC) -ohmm_test_c_no_simd HandmadeMath.o hmm_test.o -lm
|
||||
|
||||
cpp: build_cpp
|
||||
$(BUILD_DIR)/hmm_test_cpp
|
||||
@@ -45,15 +45,15 @@ build_cpp: HandmadeMath.cpp test_impl
|
||||
-DHANDMADE_MATH_CPP_MODE \
|
||||
../HandmadeMath.cpp ../hmm_test.cpp
|
||||
|
||||
cpp_no_sse: build_cpp_no_sse
|
||||
$(BUILD_DIR)/hmm_test_cpp_no_sse
|
||||
cpp_no_simd: build_cpp_no_simd
|
||||
$(BUILD_DIR)/hmm_test_cpp_no_simd
|
||||
|
||||
build_cpp_no_sse: HandmadeMath.cpp test_impl
|
||||
@echo "\nCompiling in C++ mode (no SSE)"
|
||||
build_cpp_no_simd: HandmadeMath.cpp test_impl
|
||||
@echo "\nCompiling in C++ mode (no SIMD)"
|
||||
mkdir -p $(BUILD_DIR)
|
||||
cd $(BUILD_DIR) \
|
||||
&& $(CXX) $(CPPFLAGS) $(CXXFLAGS) -ohmm_test_cpp_no_sse \
|
||||
-DHANDMADE_MATH_CPP_MODE -DHANDMADE_MATH_NO_SSE \
|
||||
&& $(CXX) $(CPPFLAGS) $(CXXFLAGS) -ohmm_test_cpp_no_simd \
|
||||
-DHANDMADE_MATH_CPP_MODE -DHANDMADE_MATH_NO_SIMD \
|
||||
../HandmadeMath.cpp ../hmm_test.cpp
|
||||
|
||||
test_impl: hmm_test.cpp hmm_test.c
|
||||
@@ -69,10 +69,10 @@ build_c_without_coverage: HandmadeMath.c test_impl
|
||||
&& $(CC) -ohmm_test_c HandmadeMath.o hmm_test.o -lm
|
||||
|
||||
build_cpp_without_coverage: HandmadeMath.cpp test_impl
|
||||
@echo "\nCompiling in C++ mode (no SSE)"
|
||||
@echo "\nCompiling in C++ mode (no SIMD)"
|
||||
mkdir -p $(BUILD_DIR)
|
||||
cd $(BUILD_DIR) \
|
||||
&& $(CXX) $(CPPFLAGS) $(CXXFLAGS) -ohmm_test_cpp_no_sse \
|
||||
&& $(CXX) $(CPPFLAGS) $(CXXFLAGS) -ohmm_test_cpp_no_simd \
|
||||
-DHANDMADE_MATH_CPP_MODE -DWITHOUT_COVERAGE \
|
||||
../HandmadeMath.cpp ../hmm_test.cpp
|
||||
|
||||
|
Reference in New Issue
Block a user