From a3376acc2e2fc940a909659f3d95fb199a548194 Mon Sep 17 00:00:00 2001 From: Cameron Cawley Date: Thu, 14 May 2026 14:50:21 +0100 Subject: [PATCH] Replace custom NEON cast macros with vreinterpret_*_* --- src/video/SDL_stretch.c | 48 +++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/src/video/SDL_stretch.c b/src/video/SDL_stretch.c index d432c0fbbd..073f62c09c 100644 --- a/src/video/SDL_stretch.c +++ b/src/video/SDL_stretch.c @@ -375,20 +375,6 @@ static bool scale_mat(const Uint32 *src, int src_w, int src_h, int src_pitch, Ui return true; } -#ifdef SDL_NEON_INTRINSICS -#define CAST_uint8x8_t (uint8x8_t) -#define CAST_uint32x2_t (uint32x2_t) -#endif - -#if defined(_MSC_VER) -#ifdef SDL_NEON_INTRINSICS -#undef CAST_uint8x8_t -#undef CAST_uint32x2_t -#define CAST_uint8x8_t -#define CAST_uint32x2_t -#endif -#endif - #ifdef SDL_SSE2_INTRINSICS #if 0 @@ -587,8 +573,8 @@ static SDL_INLINE void INTERPOL_BILINEAR_NEON(const Uint32 *s0, const Uint32 *s1 uint16x8_t d0; uint8x8_t e0; - x_00_01 = CAST_uint8x8_t vld1_u32(s0); // Load 2 pixels - x_10_11 = CAST_uint8x8_t vld1_u32(s1); + x_00_01 = vreinterpret_u8_u32(vld1_u32(s0)); // Load 2 pixels + x_10_11 = vreinterpret_u8_u32(vld1_u32(s1)); /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */ k0 = vmull_u8(x_00_01, v_frac_h1); /* k0 := x0 * (1 - frac) */ @@ -608,7 +594,7 @@ static SDL_INLINE void INTERPOL_BILINEAR_NEON(const Uint32 *s0, const Uint32 *s1 e0 = vmovn_u16(d0); // Store 1 pixel - *dst = vget_lane_u32(CAST_uint32x2_t e0, 0); + *dst = vget_lane_u32(vreinterpret_u32_u8(e0), 0); } static bool scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitch, Uint32 *dst, int dst_w, int dst_h, int dst_pitch) @@ -672,14 +658,14 @@ static bool scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitc s_16_17 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_3); // Interpolation vertical - x_00_01 = CAST_uint8x8_t vld1_u32(s_00_01); // Load 2 pixels - x_02_03 = CAST_uint8x8_t vld1_u32(s_02_03); - x_04_05 = CAST_uint8x8_t vld1_u32(s_04_05); - x_06_07 = CAST_uint8x8_t vld1_u32(s_06_07); - x_10_11 = CAST_uint8x8_t vld1_u32(s_10_11); - x_12_13 = CAST_uint8x8_t vld1_u32(s_12_13); - x_14_15 = CAST_uint8x8_t vld1_u32(s_14_15); - x_16_17 = CAST_uint8x8_t vld1_u32(s_16_17); + x_00_01 = vreinterpret_u8_u32(vld1_u32(s_00_01)); // Load 2 pixels + x_02_03 = vreinterpret_u8_u32(vld1_u32(s_02_03)); + x_04_05 = vreinterpret_u8_u32(vld1_u32(s_04_05)); + x_06_07 = vreinterpret_u8_u32(vld1_u32(s_06_07)); + x_10_11 = vreinterpret_u8_u32(vld1_u32(s_10_11)); + x_12_13 = vreinterpret_u8_u32(vld1_u32(s_12_13)); + x_14_15 = vreinterpret_u8_u32(vld1_u32(s_14_15)); + x_16_17 = vreinterpret_u8_u32(vld1_u32(s_16_17)); /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */ k0 = vmull_u8(x_00_01, v_frac_h1); /* k0 := x0 * (1 - frac) */ @@ -729,7 +715,7 @@ static bool scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitc // Narrow again e1 = vmovn_u16(d1); - f0 = vcombine_u32(CAST_uint32x2_t e0, CAST_uint32x2_t e1); + f0 = vcombine_u32(vreinterpret_u32_u8(e0), vreinterpret_u32_u8(e1)); // Store 4 pixels vst1q_u32(dst, f0); @@ -768,10 +754,10 @@ static bool scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitc s_12_13 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_1); // Interpolation vertical - x_00_01 = CAST_uint8x8_t vld1_u32(s_00_01); // Load 2 pixels - x_02_03 = CAST_uint8x8_t vld1_u32(s_02_03); - x_10_11 = CAST_uint8x8_t vld1_u32(s_10_11); - x_12_13 = CAST_uint8x8_t vld1_u32(s_12_13); + x_00_01 = vreinterpret_u8_u32(vld1_u32(s_00_01)); // Load 2 pixels + x_02_03 = vreinterpret_u8_u32(vld1_u32(s_02_03)); + x_10_11 = vreinterpret_u8_u32(vld1_u32(s_10_11)); + x_12_13 = vreinterpret_u8_u32(vld1_u32(s_12_13)); /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */ k0 = vmull_u8(x_00_01, v_frac_h1); /* k0 := x0 * (1 - frac) */ @@ -801,7 +787,7 @@ static bool scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitc e0 = vmovn_u16(d0); // Store 2 pixels - vst1_u32(dst, CAST_uint32x2_t e0); + vst1_u32(dst, vreinterpret_u32_u8(e0)); dst += 2; }