adds Blit565to565SurfaceAlphaSVE2

This commit is contained in:
Gabriel Wang
2026-05-18 15:31:22 +08:00
committed by Sam Lantinga
parent e07cfdff2c
commit 508450e9c0
4 changed files with 121 additions and 0 deletions

View File

@@ -1570,6 +1570,11 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
if (SDL_HasMMX()) {
return Blit565to565SurfaceAlphaMMX;
} else
#endif
#ifdef SDL_SVE2_INTRINSICS
if (SDL_HasSVE2()) {
return Blit565to565SurfaceAlphaSVE2;
} else
#endif
{
return Blit565to565SurfaceAlpha;

View File

@@ -86,4 +86,101 @@ size_t SDL_GetSVEVectorSize(void)
return svlen(svundef_u8()) * 8;
}
/*-----------------------------------------------------------------------------*
* RGB565 Blend with Surface Alpha *
*-----------------------------------------------------------------------------*/
SDL_TARGETING("arch=armv8-a+sve2")
ARM_NONNULL(1, 2)
static inline void sdl_sve_rgb565_stride_blend_with_opacity(uint16_t *SDL_RESTRICT phwSource,
uint16_t *SDL_RESTRICT phwTarget,
size_t uStride,
uint16_t hwOpacity)
{
sdl_sve_stride_loop_rgb16(uStride, vTailPred)
{
svuint16x3_t vSource16x3 =
sdl_sve_rgb565_unpack(svld1_u16(vTailPred, phwSource));
svuint16x3_t vTarget16x3 =
sdl_sve_rgb565_unpack(svld1_u16(vTailPred, phwTarget));
sdl_sve_pixel_ccc_foreach_chn(
vSource16x3,
vTarget16x3,
{
sve_target_u16 = sdl_sve_chn_blend_with_opacity_fast(
sve_source_u16,
sve_target_u16,
hwOpacity);
});
svst1_u16(vTailPred, phwTarget, sdl_sve_rgb565_pack(vTarget16x3));
phwSource += sve_iteration_advance;
phwTarget += sve_iteration_advance;
}
}
SDL_TARGETING("arch=armv8-a+sve2")
ARM_NONNULL(1, 3)
static inline void sdl_sve_rgb565_blend_with_opacity(uint8_t *SDL_RESTRICT pchSource,
size_t uSourceStride,
uint8_t *SDL_RESTRICT pchTarget,
size_t uTargetStride,
int nWidth,
int nHeight,
uint16_t hwOpacity)
{
hwOpacity += hwOpacity == 255;
assert(0 == ((uintptr_t)pchSource & 0x01));
assert(0 == ((uintptr_t)pchTarget & 0x01));
while (nHeight--) {
sdl_sve_rgb565_stride_blend_with_opacity((uint16_t *)pchSource,
(uint16_t *)pchTarget,
nWidth,
hwOpacity);
pchSource += uSourceStride;
pchTarget += uTargetStride;
}
}
// fast RGB565->RGB565 blending with surface alpha
SDL_TARGETING("arch=armv8-a+sve2")
void Blit565to565SurfaceAlphaSVE2(SDL_BlitInfo *info)
{
uint16_t alpha = info->a;
int width = info->dst_w;
int height = info->dst_h;
uint8_t *src = info->src;
int srcskip = info->src_skip;
uint8_t *dst = info->dst;
int dstskip = info->dst_skip;
const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
// Set up some basic variables
int srcbpp = srcfmt->bytes_per_pixel;
int dstbpp = dstfmt->bytes_per_pixel;
assert(srcbpp == 2);
assert(dstbpp == 2);
int srcstride = srcskip + srcbpp * width;
int dststride = dstskip + dstbpp * width;
sdl_sve_rgb565_blend_with_opacity(src,
srcstride,
dst,
dststride,
width,
height,
alpha);
}
#endif /* SDL_SVE2_INTRINSICS */

View File

@@ -30,6 +30,8 @@
void Blit8888to8888PixelAlphaSwizzleSVE2(SDL_BlitInfo *info);
void Blit8888to565PixelAlphaSwizzleSVE2(SDL_BlitInfo *info);
void Blit565to565SurfaceAlphaSVE2(SDL_BlitInfo *info);
size_t SDL_GetSVEVectorSize(void);
#endif /* SDL_SVE2_INTRINSICS */

View File

@@ -964,6 +964,23 @@ static inline svuint16_t sdl_sve_chn_blend_with_opacity(svuint16_t vSource,
return svlsr_n_u16_m(svptrue_b16(), vTarget, 8); // vTarget >> 8;
}
/*! \note the hwOpacity range [0, 0x100]
*/
SDL_TARGETING("arch=armv8-a+sve2")
static inline svuint16_t sdl_sve_chn_blend_with_opacity_fast(svuint16_t vSource,
svuint16_t vTarget,
uint16_t hwOpacity)
{
// vTarget = vSource * vMask + vTarget * (255 - vMask);
svuint16_t vTemp0 = svmul_n_u16_m(svptrue_b16(), vSource, hwOpacity);
vTemp0 = svmla_n_u16_m(svptrue_b16(),
vTemp0,
vTarget,
256 - hwOpacity);
return svlsr_n_u16_m(svptrue_b16(), vTemp0, 8); // vTarget >> 8;
}
/*! \note the Element range of vMask is [0, 0xFF]
* \note the hwOpacity range [0, 0x100]
*/