mirror of
https://github.com/raysan5/raylib.git
synced 2025-11-12 05:18:49 +00:00
ADDED: Some code sample for RISC-V RVV vector instructions -WIP-
This commit is contained in:
40
src/external/rlsw.h
vendored
40
src/external/rlsw.h
vendored
@@ -674,11 +674,16 @@ SWAPI void swBindTexture(uint32_t id);
|
|||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __riscv_vector
|
||||||
|
#define SW_HAS_RVV
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
//----------------------------------------------------------------------------------
|
//----------------------------------------------------------------------------------
|
||||||
// Defines and Macros
|
// Defines and Macros
|
||||||
//----------------------------------------------------------------------------------
|
//----------------------------------------------------------------------------------
|
||||||
#define SW_PI 3.14159265358979323846f
|
#define SW_PI 3.14159265358979323846f
|
||||||
#define SW_INV_255 0.00392156862745098f
|
#define SW_INV_255 0.00392156862745098f // 1.0f/255.0f
|
||||||
#define SW_DEG2RAD (SW_PI/180.0f)
|
#define SW_DEG2RAD (SW_PI/180.0f)
|
||||||
#define SW_RAD2DEG (180.0f/SW_PI)
|
#define SW_RAD2DEG (180.0f/SW_PI)
|
||||||
|
|
||||||
@@ -1102,6 +1107,27 @@ static inline void sw_float_to_unorm8_simd(uint8_t dst[4], const float src[4])
|
|||||||
clamped = _mm_packs_epi32(clamped, clamped); // s32 -> s16 (saturated)
|
clamped = _mm_packs_epi32(clamped, clamped); // s32 -> s16 (saturated)
|
||||||
clamped = _mm_packus_epi16(clamped, clamped); // s16 -> u8 (saturated < 0 to 0)
|
clamped = _mm_packus_epi16(clamped, clamped); // s16 -> u8 (saturated < 0 to 0)
|
||||||
*(uint32_t*)dst = _mm_cvtsi128_si32(clamped);
|
*(uint32_t*)dst = _mm_cvtsi128_si32(clamped);
|
||||||
|
#elif defined(SW_HAS_RVV)
|
||||||
|
// TODO: Sample code generated by AI, needs testing and review
|
||||||
|
size_t vl = vsetvl_e32m1(4); // Load up to 4 floats into a vector register
|
||||||
|
vfloat32m1_t vsrc = vle32_v_f32m1(src, vl); // Load float32 values
|
||||||
|
|
||||||
|
// Clamp to [0.0f, 1.0f]
|
||||||
|
vfloat32m1_t vzero = vfmv_v_f_f32m1(0.0f, vl);
|
||||||
|
vfloat32m1_t vone = vfmv_v_f_f32m1(1.0f, vl);
|
||||||
|
vsrc = vfmin_vv_f32m1(vsrc, vone, vl);
|
||||||
|
vsrc = vfmax_vv_f32m1(vsrc, vzero, vl);
|
||||||
|
|
||||||
|
// Multiply by 255.0f and add 0.5f for rounding
|
||||||
|
vfloat32m1_t vscaled = vfmul_vf_f32m1(vsrc, 255.0f, vl);
|
||||||
|
vscaled = vfadd_vf_f32m1(vscaled, 0.5f, vl);
|
||||||
|
|
||||||
|
// Convert to unsigned integer (truncate toward zero)
|
||||||
|
vuint32m1_t vu32 = vfcvt_xu_f_v_u32m1(vscaled, vl);
|
||||||
|
|
||||||
|
// Narrow from u32 -> u8
|
||||||
|
vuint8m1_t vu8 = vnclipu_wx_u8m1(vu32, 0, vl); // Round toward zero
|
||||||
|
vse8_v_u8m1(dst, vu8, vl); // Store result
|
||||||
#else
|
#else
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
@@ -1135,6 +1161,14 @@ static inline void sw_float_from_unorm8_simd(float dst[4], const uint8_t src[4])
|
|||||||
__m128 floats = _mm_cvtepi32_ps(ints);
|
__m128 floats = _mm_cvtepi32_ps(ints);
|
||||||
floats = _mm_mul_ps(floats, _mm_set1_ps(SW_INV_255));
|
floats = _mm_mul_ps(floats, _mm_set1_ps(SW_INV_255));
|
||||||
_mm_storeu_ps(dst, floats);
|
_mm_storeu_ps(dst, floats);
|
||||||
|
#elif defined(SW_HAS_RVV)
|
||||||
|
// TODO: Sample code generated by AI, needs testing and review
|
||||||
|
size_t vl = vsetvl_e8m1(4); // Set vector length for 8-bit input elements
|
||||||
|
vuint8m1_t vsrc_u8 = vle8_v_u8m1(src, vl); // Load 4 unsigned 8-bit integers
|
||||||
|
vuint32m1_t vsrc_u32 = vwcvt_xu_u_v_u32m1(vsrc_u8, vl); // Widen to 32-bit unsigned integers
|
||||||
|
vfloat32m1_t vsrc_f32 = vfcvt_f_xu_v_f32m1(vsrc_u32, vl); // Convert to float32
|
||||||
|
vfloat32m1_t vnorm = vfmul_vf_f32m1(vsrc_f32, SW_INV_255, vl); // Multiply by 1/255.0 to normalize
|
||||||
|
vse32_v_f32m1(dst, vnorm, vl); // Store result
|
||||||
#else
|
#else
|
||||||
dst[0] = (float)src[0]*SW_INV_255;
|
dst[0] = (float)src[0]*SW_INV_255;
|
||||||
dst[1] = (float)src[1]*SW_INV_255;
|
dst[1] = (float)src[1]*SW_INV_255;
|
||||||
@@ -2672,8 +2706,8 @@ static inline void FUNC_NAME(void)
|
|||||||
float ySubstep = 1.0f - sw_fract(v0->screen[1]); \
|
float ySubstep = 1.0f - sw_fract(v0->screen[1]); \
|
||||||
\
|
\
|
||||||
/* Calculation of vertex gradients in X and Y */ \
|
/* Calculation of vertex gradients in X and Y */ \
|
||||||
float dUdx, dVdx; \
|
float dUdx = 0.0f, dVdx = 0.0f; \
|
||||||
float dUdy, dVdy; \
|
float dUdy = 0.0f, dVdy = 0.0f; \
|
||||||
if (ENABLE_TEXTURE) { \
|
if (ENABLE_TEXTURE) { \
|
||||||
dUdx = (v1->texcoord[0] - v0->texcoord[0])*wRcp; \
|
dUdx = (v1->texcoord[0] - v0->texcoord[0])*wRcp; \
|
||||||
dVdx = (v1->texcoord[1] - v0->texcoord[1])*wRcp; \
|
dVdx = (v1->texcoord[1] - v0->texcoord[1])*wRcp; \
|
||||||
|
|||||||
Reference in New Issue
Block a user