adds sdl_sve_chn_blend_with_mask_fast for RGB565 alpha-blending

This commit is contained in:
Gabriel Wang
2026-05-18 14:32:10 +08:00
committed by Sam Lantinga
parent 8d9d6b1b2b
commit e07cfdff2c
2 changed files with 28 additions and 7 deletions

View File

@@ -51,12 +51,12 @@
}
#undef sdl_sve_rgb32_blend_to_rgb565_op
#define sdl_sve_rgb32_blend_to_rgb565_op(ma_alpha_chn_idx) \
do { \
svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
sve_target_u16 = sdl_sve_chn_blend_with_mask(sve_source_u16, \
sve_target_u16, \
vMask); \
#define sdl_sve_rgb32_blend_to_rgb565_op(ma_alpha_chn_idx) \
do { \
svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
sve_target_u16 = sdl_sve_chn_blend_with_mask_fast(sve_source_u16, \
sve_target_u16, \
vMask); \
} while (0)
#include "SDL_sve2_swizzle.h"

View File

@@ -902,7 +902,9 @@ static inline void svst4ub_u16(svbool_t vPredu8,
/*! \note the Element range of vMask is [0, 0xFF]
*/
SDL_TARGETING("arch=armv8-a+sve2")
static inline svuint16_t sdl_sve_chn_blend_with_mask(svuint16_t vSource, svuint16_t vTarget, svuint16_t vMask)
static inline svuint16_t sdl_sve_chn_blend_with_mask(svuint16_t vSource,
svuint16_t vTarget,
svuint16_t vMask)
{
// vTarget = vSource * vMask + vTarget * (255 - vMask);
svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
@@ -924,6 +926,25 @@ static inline svuint16_t sdl_sve_chn_blend_with_mask(svuint16_t vSource, svuint1
return svlsr_n_u16_m(svptrue_b16(), vTemp0, 8); // vTarget >> 8;
}
/*! \note the Element range of vMask is [0, 0xFF]
*/
SDL_TARGETING("arch=armv8-a+sve2")
static inline svuint16_t sdl_sve_chn_blend_with_mask_fast(svuint16_t vSource,
svuint16_t vTarget,
svuint16_t vMask)
{
// vTarget = vSource * vMask + vTarget * (255 - vMask);
svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
vTemp0 = svmla_u16_m(svptrue_b16(),
vTemp0,
vTarget,
svsub_u16_m(svptrue_b16(),
svdup_u16(255),
vMask));
return svlsr_n_u16_m(svptrue_b16(), vTemp0, 8); // vTarget >> 8;
}
/*! \note the hwOpacity range [0, 0x100]
*/
SDL_TARGETING("arch=armv8-a+sve2")