Implement fast 255 mult-div in SDL_blit_auto

This commit is contained in:
Isaac Aronson
2023-12-17 07:55:49 -06:00
committed by Sam Lantinga
parent 23d4e9ec13
commit b34faf732d
5 changed files with 1663 additions and 1083 deletions

View File

@@ -509,6 +509,13 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface);
x += x >> 8; \
dC = x >> 8; \
} while (0)
/* Perform a division by 255 after a multiplication of two 8-bit color channels */
#define MULT_DIV_255(sC, dC, out) \
do { \
Uint16 x = sC * dC; \
x += x >> 8; \
out = x >> 8; \
} while (0)
/* Blend the RGB values of two pixels with an alpha value */
#define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB) \
do { \

View File

@@ -1164,7 +1164,7 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
}
while (height--) {
if (srcbpp == 4 && dstbpp == 4 && dstfmt->Ashift == 24 && dstfmt->Rshift == 16 && dstfmt->Gshift == 8 &&
/* if (srcbpp == 4 && dstbpp == 4 && dstfmt->Ashift == 24 && dstfmt->Rshift == 16 && dstfmt->Gshift == 8 &&
dstfmt->Bshift == 0) {
DUFFS_LOOP4(
{
@@ -1176,7 +1176,7 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
dst += dstbpp;
},
width);
} else {
} else { */
/* *INDENT-OFF* */ /* clang-format off */
DUFFS_LOOP4(
{
@@ -1193,7 +1193,7 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
/* *INDENT-ON* */ /* clang-format on */
src += srcskip;
dst += dstskip;
}
// }
}
if (freeFormat) {
SDL_DestroyPixelFormat(dstfmt);

File diff suppressed because it is too large Load Diff

View File

@@ -20,6 +20,7 @@
3. This notice may not be removed or altered from any source distribution.
*/
#include "SDL_internal.h"
#include "SDL_blit.h"
#if SDL_HAVE_BLIT_AUTO

View File

@@ -111,6 +111,7 @@ sub open_file {
3. This notice may not be removed or altered from any source distribution.
*/
#include "SDL_internal.h"
#include "SDL_blit.h"
#if SDL_HAVE_BLIT_AUTO
@@ -309,15 +310,15 @@ __EOF__
if ( $modulate ) {
print FILE <<__EOF__;
if (flags & SDL_COPY_MODULATE_COLOR) {
${s}R = (${s}R * modulateR) / 255;
${s}G = (${s}G * modulateG) / 255;
${s}B = (${s}B * modulateB) / 255;
MULT_DIV_255(${s}R, modulateR, ${s}R);
MULT_DIV_255(${s}G, modulateG, ${s}G);
MULT_DIV_255(${s}B, modulateB, ${s}B);
}
__EOF__
if (!$ignore_dst_alpha && !$is_modulateA_done) {
print FILE <<__EOF__;
if (flags & SDL_COPY_MODULATE_ALPHA) {
${s}A = (${s}A * modulateA) / 255;
MULT_DIV_255(${s}A, modulateA, ${s}A);
}
__EOF__
}
@@ -328,9 +329,9 @@ __EOF__
if (flags & (SDL_COPY_BLEND|SDL_COPY_ADD)) {
/* This goes away if we ever use premultiplied alpha */
if (${s}A < 255) {
${s}R = (${s}R * ${s}A) / 255;
${s}G = (${s}G * ${s}A) / 255;
${s}B = (${s}B * ${s}A) / 255;
MULT_DIV_255(${s}R, ${s}A, ${s}R);
MULT_DIV_255(${s}G, ${s}A, ${s}G);
MULT_DIV_255(${s}B, ${s}A, ${s}B);
}
}
__EOF__
@@ -347,9 +348,12 @@ __EOF__
__EOF__
} else {
print FILE <<__EOF__;
${d}R = ${s}R + ((255 - ${s}A) * ${d}R) / 255;
${d}G = ${s}G + ((255 - ${s}A) * ${d}G) / 255;
${d}B = ${s}B + ((255 - ${s}A) * ${d}B) / 255;
MULT_DIV_255((255 - ${s}A), ${d}R, ${d}R);
${d}R += ${s}R;
MULT_DIV_255((255 - ${s}A), ${d}G, ${d}G);
${d}G += ${s}G;
MULT_DIV_255((255 - ${s}A), ${d}B, ${d}B);
${d}B += ${s}B;
__EOF__
}
if ( $dst_has_alpha ) {
@@ -359,7 +363,8 @@ __EOF__
__EOF__
} else {
print FILE <<__EOF__;
${d}A = ${s}A + ((255 - ${s}A) * ${d}A) / 255;
MULT_DIV_255((255 - ${s}A), ${d}A, ${d}A);
${d}A += ${s}A;
__EOF__
}
}
@@ -372,23 +377,29 @@ __EOF__
${d}B = ${s}B + ${d}B; if (${d}B > 255) ${d}B = 255;
break;
case SDL_COPY_MOD:
${d}R = (${s}R * ${d}R) / 255;
${d}G = (${s}G * ${d}G) / 255;
${d}B = (${s}B * ${d}B) / 255;
MULT_DIV_255(${s}R, ${d}R, ${d}R);
MULT_DIV_255(${s}G, ${d}G, ${d}G);
MULT_DIV_255(${s}B, ${d}B, ${d}B);
break;
case SDL_COPY_MUL:
__EOF__
if ($A_is_const_FF) {
print FILE <<__EOF__;
${d}R = (${s}R * ${d}R) / 255;
${d}G = (${s}G * ${d}G) / 255;
${d}B = (${s}B * ${d}B) / 255;
MULT_DIV_255(${s}R, ${d}R, ${d}R);
MULT_DIV_255(${s}G, ${d}G, ${d}G);
MULT_DIV_255(${s}B, ${d}B, ${d}B);
__EOF__
} else {
print FILE <<__EOF__;
${d}R = ((${s}R * ${d}R) + (${d}R * (255 - ${s}A))) / 255; if (${d}R > 255) ${d}R = 255;
${d}G = ((${s}G * ${d}G) + (${d}G * (255 - ${s}A))) / 255; if (${d}G > 255) ${d}G = 255;
${d}B = ((${s}B * ${d}B) + (${d}B * (255 - ${s}A))) / 255; if (${d}B > 255) ${d}B = 255;
MULT_DIV_255(${d}R, (255 - ${s}A), ${d}R);
${d}R += (${s}R * ${d}R);
if (${d}R > 255) ${d}R = 255;
MULT_DIV_255(${d}B, (255 - ${s}A), ${d}B);
${d}B += (${s}B * ${d}B);
if (${d}B > 255) ${d}B = 255;
MULT_DIV_255(${d}G, (255 - ${s}A), ${d}G);
${d}G += (${s}G * ${d}G);
if (${d}G > 255) ${d}G = 255;
__EOF__
}