mirror of
https://github.com/libsdl-org/SDL.git
synced 2026-05-25 14:28:34 +00:00
adds Blit565to565SurfaceAlphaSVE2
This commit is contained in:
committed by
Sam Lantinga
parent
e07cfdff2c
commit
508450e9c0
@@ -1570,6 +1570,11 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
|
||||
if (SDL_HasMMX()) {
|
||||
return Blit565to565SurfaceAlphaMMX;
|
||||
} else
|
||||
#endif
|
||||
#ifdef SDL_SVE2_INTRINSICS
|
||||
if (SDL_HasSVE2()) {
|
||||
return Blit565to565SurfaceAlphaSVE2;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
return Blit565to565SurfaceAlpha;
|
||||
|
||||
@@ -86,4 +86,101 @@ size_t SDL_GetSVEVectorSize(void)
|
||||
return svlen(svundef_u8()) * 8;
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------------*
|
||||
* RGB565 Blend with Surface Alpha *
|
||||
*-----------------------------------------------------------------------------*/
|
||||
SDL_TARGETING("arch=armv8-a+sve2")
|
||||
ARM_NONNULL(1, 2)
|
||||
static inline void sdl_sve_rgb565_stride_blend_with_opacity(uint16_t *SDL_RESTRICT phwSource,
|
||||
uint16_t *SDL_RESTRICT phwTarget,
|
||||
size_t uStride,
|
||||
uint16_t hwOpacity)
|
||||
{
|
||||
sdl_sve_stride_loop_rgb16(uStride, vTailPred)
|
||||
{
|
||||
|
||||
svuint16x3_t vSource16x3 =
|
||||
sdl_sve_rgb565_unpack(svld1_u16(vTailPred, phwSource));
|
||||
|
||||
svuint16x3_t vTarget16x3 =
|
||||
sdl_sve_rgb565_unpack(svld1_u16(vTailPred, phwTarget));
|
||||
|
||||
sdl_sve_pixel_ccc_foreach_chn(
|
||||
vSource16x3,
|
||||
vTarget16x3,
|
||||
{
|
||||
sve_target_u16 = sdl_sve_chn_blend_with_opacity_fast(
|
||||
sve_source_u16,
|
||||
sve_target_u16,
|
||||
hwOpacity);
|
||||
});
|
||||
|
||||
svst1_u16(vTailPred, phwTarget, sdl_sve_rgb565_pack(vTarget16x3));
|
||||
|
||||
phwSource += sve_iteration_advance;
|
||||
phwTarget += sve_iteration_advance;
|
||||
}
|
||||
}
|
||||
|
||||
SDL_TARGETING("arch=armv8-a+sve2")
|
||||
ARM_NONNULL(1, 3)
|
||||
static inline void sdl_sve_rgb565_blend_with_opacity(uint8_t *SDL_RESTRICT pchSource,
|
||||
size_t uSourceStride,
|
||||
uint8_t *SDL_RESTRICT pchTarget,
|
||||
size_t uTargetStride,
|
||||
int nWidth,
|
||||
int nHeight,
|
||||
uint16_t hwOpacity)
|
||||
{
|
||||
hwOpacity += hwOpacity == 255;
|
||||
assert(0 == ((uintptr_t)pchSource & 0x01));
|
||||
assert(0 == ((uintptr_t)pchTarget & 0x01));
|
||||
|
||||
while (nHeight--) {
|
||||
|
||||
sdl_sve_rgb565_stride_blend_with_opacity((uint16_t *)pchSource,
|
||||
(uint16_t *)pchTarget,
|
||||
nWidth,
|
||||
hwOpacity);
|
||||
|
||||
pchSource += uSourceStride;
|
||||
pchTarget += uTargetStride;
|
||||
}
|
||||
}
|
||||
|
||||
// fast RGB565->RGB565 blending with surface alpha
|
||||
SDL_TARGETING("arch=armv8-a+sve2")
|
||||
void Blit565to565SurfaceAlphaSVE2(SDL_BlitInfo *info)
|
||||
{
|
||||
uint16_t alpha = info->a;
|
||||
|
||||
int width = info->dst_w;
|
||||
int height = info->dst_h;
|
||||
uint8_t *src = info->src;
|
||||
int srcskip = info->src_skip;
|
||||
uint8_t *dst = info->dst;
|
||||
int dstskip = info->dst_skip;
|
||||
|
||||
const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
|
||||
const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
|
||||
|
||||
// Set up some basic variables
|
||||
int srcbpp = srcfmt->bytes_per_pixel;
|
||||
int dstbpp = dstfmt->bytes_per_pixel;
|
||||
|
||||
assert(srcbpp == 2);
|
||||
assert(dstbpp == 2);
|
||||
|
||||
int srcstride = srcskip + srcbpp * width;
|
||||
int dststride = dstskip + dstbpp * width;
|
||||
|
||||
sdl_sve_rgb565_blend_with_opacity(src,
|
||||
srcstride,
|
||||
dst,
|
||||
dststride,
|
||||
width,
|
||||
height,
|
||||
alpha);
|
||||
}
|
||||
|
||||
#endif /* SDL_SVE2_INTRINSICS */
|
||||
@@ -30,6 +30,8 @@
|
||||
void Blit8888to8888PixelAlphaSwizzleSVE2(SDL_BlitInfo *info);
|
||||
void Blit8888to565PixelAlphaSwizzleSVE2(SDL_BlitInfo *info);
|
||||
|
||||
void Blit565to565SurfaceAlphaSVE2(SDL_BlitInfo *info);
|
||||
|
||||
size_t SDL_GetSVEVectorSize(void);
|
||||
|
||||
#endif /* SDL_SVE2_INTRINSICS */
|
||||
|
||||
@@ -964,6 +964,23 @@ static inline svuint16_t sdl_sve_chn_blend_with_opacity(svuint16_t vSource,
|
||||
return svlsr_n_u16_m(svptrue_b16(), vTarget, 8); // vTarget >> 8;
|
||||
}
|
||||
|
||||
/*! \note the hwOpacity range [0, 0x100]
|
||||
*/
|
||||
SDL_TARGETING("arch=armv8-a+sve2")
|
||||
static inline svuint16_t sdl_sve_chn_blend_with_opacity_fast(svuint16_t vSource,
|
||||
svuint16_t vTarget,
|
||||
uint16_t hwOpacity)
|
||||
{
|
||||
// vTarget = vSource * vMask + vTarget * (255 - vMask);
|
||||
svuint16_t vTemp0 = svmul_n_u16_m(svptrue_b16(), vSource, hwOpacity);
|
||||
vTemp0 = svmla_n_u16_m(svptrue_b16(),
|
||||
vTemp0,
|
||||
vTarget,
|
||||
256 - hwOpacity);
|
||||
|
||||
return svlsr_n_u16_m(svptrue_b16(), vTemp0, 8); // vTarget >> 8;
|
||||
}
|
||||
|
||||
/*! \note the Element range of vMask is [0, 0xFF]
|
||||
* \note the hwOpacity range [0, 0x100]
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user