diff --git a/src/video/SDL_blit.h b/src/video/SDL_blit.h index 90588153ad..04525d7ee0 100644 --- a/src/video/SDL_blit.h +++ b/src/video/SDL_blit.h @@ -493,21 +493,30 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface); } \ } +/* Blend a single color channel or alpha value */ +#define ALPHA_BLEND_CHANNEL(sC, dC, sA) \ + do { \ + Uint16 x; \ + x = ((sC - dC) * sA) + ((dC << 8) - dC); \ + x += 0x1U; \ + x += x >> 8; \ + dC = x >> 8; \ + } while (0) /* Blend the RGB values of two pixels with an alpha value */ #define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB) \ do { \ - dR = (Uint8)((((int)(sR - dR) * (int)A) / 255) + dR); \ - dG = (Uint8)((((int)(sG - dG) * (int)A) / 255) + dG); \ - dB = (Uint8)((((int)(sB - dB) * (int)A) / 255) + dB); \ + ALPHA_BLEND_CHANNEL(sR, dR, A); \ + ALPHA_BLEND_CHANNEL(sG, dG, A); \ + ALPHA_BLEND_CHANNEL(sB, dB, A); \ } while (0) /* Blend the RGBA values of two pixels */ -#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA) \ - do { \ - dR = (Uint8)((((int)(sR - dR) * (int)sA) / 255) + dR); \ - dG = (Uint8)((((int)(sG - dG) * (int)sA) / 255) + dG); \ - dB = (Uint8)((((int)(sB - dB) * (int)sA) / 255) + dB); \ - dA = (Uint8)((int)sA + dA - ((int)sA * dA) / 255); \ +#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA) \ + do { \ + ALPHA_BLEND_CHANNEL(sR, dR, sA); \ + ALPHA_BLEND_CHANNEL(sG, dG, sA); \ + ALPHA_BLEND_CHANNEL(sB, dB, sA); \ + ALPHA_BLEND_CHANNEL(255, dA, sA); \ } while (0) /* This is a very useful loop for optimizing blitters */ diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c index 7f272fd090..3707ca891c 100644 --- a/src/video/SDL_blit_A.c +++ b/src/video/SDL_blit_A.c @@ -460,22 +460,24 @@ static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info) int dstskip = info->dst_skip >> 2; Uint32 s; Uint32 d; - Uint32 s1; - Uint32 d1; while (height--) { /* *INDENT-OFF* */ /* clang-format off */ DUFFS_LOOP4({ s = *srcp; d = *dstp; - s1 = s & 0xff00ff; - d1 = d & 0xff00ff; - d1 = (d1 + ((s1 - d1) * alpha >> 8)) - & 0xff00ff; - s &= 0xff00; - d &= 0xff00; - d = (d + ((s - d) * alpha >> 8)) & 0xff00; - *dstp = d1 | d | 0xff000000; + Uint8 sR = (s >> 16) & 0xFF; + Uint8 sG = (s >> 8) & 0xFF; + Uint8 sB = s & 0xFF; + Uint8 dR = (d >> 16) & 0xFF; + Uint8 dG = (d >> 8) & 0xFF; + Uint8 dB = d & 0xFF; + + ALPHA_BLEND_CHANNEL(sR, dR, alpha); + ALPHA_BLEND_CHANNEL(sG, dG, alpha); + ALPHA_BLEND_CHANNEL(sB, dB, alpha); + + *dstp = (dR << 16) | (dG << 8) | dB | 0xFF000000; ++srcp; ++dstp; }, width); @@ -950,97 +952,6 @@ static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info) } } -/* fast ARGB8888->RGB565 blending with pixel alpha */ -static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info) -{ - int width = info->dst_w; - int height = info->dst_h; - Uint32 *srcp = (Uint32 *)info->src; - int srcskip = info->src_skip >> 2; - Uint16 *dstp = (Uint16 *)info->dst; - int dstskip = info->dst_skip >> 1; - - while (height--) { - /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4({ - Uint32 s = *srcp; - unsigned alpha = s >> 27; /* downscale alpha to 5 bits */ - /* FIXME: Here we special-case opaque alpha since the - compositioning used (>>8 instead of /255) doesn't handle - it correctly. Also special-case alpha=0 for speed? - Benchmark this! */ - if (alpha) { - if (alpha == (SDL_ALPHA_OPAQUE >> 3)) { - *dstp = (Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f)); - } else { - Uint32 d = *dstp; - /* - * convert source and destination to G0RAB65565 - * and blend all components at the same time - */ - s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800) - + (s >> 3 & 0x1f); - d = (d | d << 16) & 0x07e0f81f; - d += (s - d) * alpha >> 5; - d &= 0x07e0f81f; - *dstp = (Uint16)(d | d >> 16); - } - } - srcp++; - dstp++; - }, width); - /* *INDENT-ON* */ /* clang-format on */ - srcp += srcskip; - dstp += dstskip; - } -} - -/* fast ARGB8888->RGB555 blending with pixel alpha */ -static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info) -{ - int width = info->dst_w; - int height = info->dst_h; - Uint32 *srcp = (Uint32 *)info->src; - int srcskip = info->src_skip >> 2; - Uint16 *dstp = (Uint16 *)info->dst; - int dstskip = info->dst_skip >> 1; - - while (height--) { - /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4({ - unsigned alpha; - Uint32 s = *srcp; - alpha = s >> 27; /* downscale alpha to 5 bits */ - /* FIXME: Here we special-case opaque alpha since the - compositioning used (>>8 instead of /255) doesn't handle - it correctly. Also special-case alpha=0 for speed? - Benchmark this! */ - if (alpha) { - if (alpha == (SDL_ALPHA_OPAQUE >> 3)) { - *dstp = (Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f)); - } else { - Uint32 d = *dstp; - /* - * convert source and destination to G0RAB65565 - * and blend all components at the same time - */ - s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00) - + (s >> 3 & 0x1f); - d = (d | d << 16) & 0x03e07c1f; - d += (s - d) * alpha >> 5; - d &= 0x03e07c1f; - *dstp = (Uint16)(d | d >> 16); - } - } - srcp++; - dstp++; - }, width); - /* *INDENT-ON* */ /* clang-format on */ - srcp += srcskip; - dstp += dstskip; - } -} - /* General (slow) N->N blending with per-surface alpha */ static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info) { @@ -1119,15 +1030,6 @@ static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info) } } -/* Accurate alpha blending with no division */ -static Uint8 AlphaBlendChannel(Uint8 sC, Uint8 dC, Uint8 sA) -{ - Uint16 x = ((sC - dC) * sA) + ((dC << 8) - dC); - x += 0x1U; // Use 0x80 to round instead of floor - x += x >> 8; - return x >> 8; -} - /* General (slow) N->N blending with pixel alpha */ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) { @@ -1141,7 +1043,7 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) SDL_PixelFormat *dstfmt = info->dst_fmt; int srcbpp; int dstbpp; - int freeFormat; + SDL_bool freeFormat; Uint32 Pixel; unsigned sR, sG, sB, sA; unsigned dR, dG, dB, dA; @@ -1149,7 +1051,7 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) /* Set up some basic variables */ srcbpp = srcfmt->bytes_per_pixel; dstbpp = dstfmt->bytes_per_pixel; - freeFormat = 0; + freeFormat = SDL_FALSE; #ifdef SDL_AVX2_INTRINSICS if (srcbpp == 4 && dstbpp == 4 && width >= 4 && SDL_HasAVX2()) { @@ -1167,7 +1069,7 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) /* Handle case where bad input sent */ if (dstfmt->Ashift == 0 && dstfmt->Ashift == dstfmt->Bshift) { dstfmt = SDL_CreatePixelFormat(SDL_PIXELFORMAT_ARGB8888); - freeFormat = 1; + freeFormat = SDL_TRUE; } while (height--) { @@ -1177,10 +1079,7 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); if (sA) { DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA); - dR = AlphaBlendChannel(sR, dR, sA); - dG = AlphaBlendChannel(sG, dG, sA); - dB = AlphaBlendChannel(sB, dB, sA); - dA = AlphaBlendChannel(255, dA, sA); + ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA); ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); } src += srcbpp; @@ -1214,13 +1113,6 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface) } case 2: - if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 && sf->Gmask == 0xff00 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { - if (df->Gmask == 0x7e0) { - return BlitARGBto565PixelAlpha; - } else if (df->Gmask == 0x3e0) { - return BlitARGBto555PixelAlpha; - } - } return BlitNtoNPixelAlpha; case 4: diff --git a/src/video/SDL_blit_A_avx2.c b/src/video/SDL_blit_A_avx2.c index 8f4b3f3561..04b5851c2d 100644 --- a/src/video/SDL_blit_A_avx2.c +++ b/src/video/SDL_blit_A_avx2.c @@ -113,11 +113,11 @@ void SDL_TARGETING("avx2") BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info) SDL_PixelFormat *dstfmt = info->dst_fmt; int chunks = width / 8; - int free_format = 0; + SDL_bool free_format = SDL_FALSE; /* Handle case when passed invalid format, assume ARGB destination */ if (dstfmt->Ashift == 0 && dstfmt->Ashift == dstfmt->Bshift) { dstfmt = SDL_CreatePixelFormat(SDL_PIXELFORMAT_ARGB8888); - free_format = 1; + free_format = SDL_TRUE; } const __m256i shift_mask = GetSDL_PixelFormatShuffleMask_AVX2(srcfmt, dstfmt); const __m256i splat_mask = GetSDL_PixelFormatAlphaSplatMask_AVX2(dstfmt); diff --git a/src/video/SDL_blit_A_sse4_1.c b/src/video/SDL_blit_A_sse4_1.c index e243561d8b..fae70c6cb9 100644 --- a/src/video/SDL_blit_A_sse4_1.c +++ b/src/video/SDL_blit_A_sse4_1.c @@ -126,11 +126,11 @@ void SDL_TARGETING("sse4.1") BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) { SDL_PixelFormat *dstfmt = info->dst_fmt; const int chunks = width / 4; - int free_format = 0; + SDL_bool free_format = SDL_FALSE; /* Handle case when passed invalid format, assume ARGB destination */ if (dstfmt->Ashift == 0 && dstfmt->Ashift == dstfmt->Bshift) { dstfmt = SDL_CreatePixelFormat(SDL_PIXELFORMAT_ARGB8888); - free_format = 1; + free_format = SDL_TRUE; } const __m128i shift_mask = GetSDL_PixelFormatShuffleMask_SSE4_1(srcfmt, dstfmt); const __m128i splat_mask = GetSDL_PixelFormatAlphaSplatMask_SSE4_1(dstfmt);