Added hardware accelerated support for palettized textures

Supported backends: direct3d, direct3d11, direct3d12, gpu, metal, opengl, opengles2
This commit is contained in:
Sam Lantinga
2025-09-27 22:55:21 -07:00
parent b82b1f416f
commit e2fe23ddab
51 changed files with 21216 additions and 15037 deletions

View File

@@ -0,0 +1,92 @@
#if 0
//
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
//
// Parameters:
//
// sampler2D image;
// sampler1D palette;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// image s0 1
// palette s1 1
//
ps_2_0
def c0, 0.99609375, 0.001953125, 0, 0
dcl t0.xy
dcl v0
dcl_2d s0
dcl_2d s1
texld r0, t0, s0
mad r0.xy, r0.x, c0.x, c0.y
texld r0, r0, s1
mul r0, r0, v0
mov oC0, r0
// approximately 5 instruction slots used (2 texture, 3 arithmetic)
#endif
const BYTE g_ps20_main[] =
{
0, 2, 255, 255, 254, 255,
42, 0, 67, 84, 65, 66,
28, 0, 0, 0, 123, 0,
0, 0, 0, 2, 255, 255,
2, 0, 0, 0, 28, 0,
0, 0, 0, 1, 0, 0,
116, 0, 0, 0, 68, 0,
0, 0, 3, 0, 0, 0,
1, 0, 0, 0, 76, 0,
0, 0, 0, 0, 0, 0,
92, 0, 0, 0, 3, 0,
1, 0, 1, 0, 0, 0,
100, 0, 0, 0, 0, 0,
0, 0, 105, 109, 97, 103,
101, 0, 171, 171, 4, 0,
12, 0, 1, 0, 1, 0,
1, 0, 0, 0, 0, 0,
0, 0, 112, 97, 108, 101,
116, 116, 101, 0, 4, 0,
11, 0, 1, 0, 1, 0,
1, 0, 0, 0, 0, 0,
0, 0, 112, 115, 95, 50,
95, 48, 0, 77, 105, 99,
114, 111, 115, 111, 102, 116,
32, 40, 82, 41, 32, 72,
76, 83, 76, 32, 83, 104,
97, 100, 101, 114, 32, 67,
111, 109, 112, 105, 108, 101,
114, 32, 49, 48, 46, 49,
0, 171, 81, 0, 0, 5,
0, 0, 15, 160, 0, 0,
127, 63, 0, 0, 0, 59,
0, 0, 0, 0, 0, 0,
0, 0, 31, 0, 0, 2,
0, 0, 0, 128, 0, 0,
3, 176, 31, 0, 0, 2,
0, 0, 0, 128, 0, 0,
15, 144, 31, 0, 0, 2,
0, 0, 0, 144, 0, 8,
15, 160, 31, 0, 0, 2,
0, 0, 0, 144, 1, 8,
15, 160, 66, 0, 0, 3,
0, 0, 15, 128, 0, 0,
228, 176, 0, 8, 228, 160,
4, 0, 0, 4, 0, 0,
3, 128, 0, 0, 0, 128,
0, 0, 0, 160, 0, 0,
85, 160, 66, 0, 0, 3,
0, 0, 15, 128, 0, 0,
228, 128, 1, 8, 228, 160,
5, 0, 0, 3, 0, 0,
15, 128, 0, 0, 228, 128,
0, 0, 228, 144, 1, 0,
0, 2, 0, 8, 15, 128,
0, 0, 228, 128, 255, 255,
0, 0
};

View File

@@ -0,0 +1,19 @@
uniform sampler2D image;
uniform sampler1D palette;
struct PixelShaderInput
{
float4 pos : SV_POSITION;
float2 tex : TEXCOORD0;
float4 color : COLOR0;
};
float4 main(PixelShaderInput input) : SV_TARGET
{
float4 Output;
float index;
index = tex2D(image, input.tex).r;
Output = tex1D(palette, index * (255. / 256) + (0.5 / 256));
return Output * input.color;
}

View File

@@ -51,114 +51,114 @@
const BYTE g_ps20_main[] =
{
0, 2, 255, 255, 254, 255,
97, 0, 67, 84, 65, 66,
28, 0, 0, 0, 87, 1,
0, 0, 0, 2, 255, 255,
7, 0, 0, 0, 28, 0,
0, 0, 0, 1, 0, 0,
80, 1, 0, 0, 168, 0,
0, 0, 2, 0, 3, 0,
1, 0, 0, 0, 176, 0,
0, 0, 0, 0, 0, 0,
192, 0, 0, 0, 2, 0,
2, 0, 1, 0, 0, 0,
176, 0, 0, 0, 0, 0,
0, 0, 199, 0, 0, 0,
2, 0, 1, 0, 1, 0,
0, 0, 176, 0, 0, 0,
0, 0, 0, 0, 206, 0,
0, 0, 2, 0, 0, 0,
1, 0, 0, 0, 176, 0,
0, 0, 0, 0, 0, 0,
214, 0, 0, 0, 3, 0,
1, 0, 1, 0, 0, 0,
240, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0,
3, 0, 2, 0, 1, 0,
0, 0, 24, 1, 0, 0,
0, 0, 0, 0, 40, 1,
0, 0, 3, 0, 0, 0,
1, 0, 0, 0, 64, 1,
0, 0, 0, 0, 0, 0,
66, 99, 111, 101, 102, 102,
0, 171, 1, 0, 3, 0,
1, 0, 4, 0, 1, 0,
0, 0, 0, 0, 0, 0,
71, 99, 111, 101, 102, 102,
0, 82, 99, 111, 101, 102,
102, 0, 89, 111, 102, 102,
115, 101, 116, 0, 116, 104,
101, 83, 97, 109, 112, 108,
101, 114, 43, 116, 104, 101,
84, 101, 120, 116, 117, 114,
101, 85, 0, 171, 171, 171,
4, 0, 7, 0, 1, 0,
4, 0, 1, 0, 0, 0,
0, 0, 0, 0, 116, 104,
101, 83, 97, 109, 112, 108,
101, 114, 43, 116, 104, 101,
84, 101, 120, 116, 117, 114,
101, 86, 0, 171, 4, 0,
7, 0, 1, 0, 4, 0,
1, 0, 0, 0, 0, 0,
0, 0, 116, 104, 101, 83,
97, 109, 112, 108, 101, 114,
43, 116, 104, 101, 84, 101,
120, 116, 117, 114, 101, 89,
0, 171, 4, 0, 7, 0,
1, 0, 4, 0, 1, 0,
0, 0, 0, 0, 0, 0,
112, 115, 95, 50, 95, 48,
0, 77, 105, 99, 114, 111,
115, 111, 102, 116, 32, 40,
82, 41, 32, 72, 76, 83,
76, 32, 83, 104, 97, 100,
101, 114, 32, 67, 111, 109,
112, 105, 108, 101, 114, 32,
49, 48, 46, 49, 0, 171,
81, 0, 0, 5, 4, 0,
15, 160, 0, 0, 128, 63,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
31, 0, 0, 2, 0, 0,
0, 128, 0, 0, 3, 176,
31, 0, 0, 2, 0, 0,
0, 128, 0, 0, 15, 144,
31, 0, 0, 2, 0, 0,
0, 144, 0, 8, 15, 160,
31, 0, 0, 2, 0, 0,
0, 144, 1, 8, 15, 160,
31, 0, 0, 2, 0, 0,
0, 144, 2, 8, 15, 160,
66, 0, 0, 3, 0, 0,
15, 128, 0, 0, 228, 176,
0, 8, 228, 160, 66, 0,
0, 3, 1, 0, 15, 128,
0, 0, 228, 176, 1, 8,
228, 160, 66, 0, 0, 3,
2, 0, 15, 128, 0, 0,
228, 176, 2, 8, 228, 160,
1, 0, 0, 2, 0, 0,
2, 128, 1, 0, 0, 128,
1, 0, 0, 2, 0, 0,
4, 128, 2, 0, 0, 128,
2, 0, 0, 3, 0, 0,
7, 128, 0, 0, 228, 128,
0, 0, 228, 160, 8, 0,
0, 3, 1, 0, 1, 128,
0, 0, 228, 128, 1, 0,
228, 160, 8, 0, 0, 3,
1, 0, 2, 128, 0, 0,
228, 128, 2, 0, 228, 160,
8, 0, 0, 3, 1, 0,
4, 128, 0, 0, 228, 128,
3, 0, 228, 160, 1, 0,
0, 2, 1, 0, 8, 128,
4, 0, 0, 160, 5, 0,
0, 3, 0, 0, 15, 128,
1, 0, 228, 128, 0, 0,
228, 144, 1, 0, 0, 2,
0, 8, 15, 128, 0, 0,
0, 2, 255, 255, 254, 255,
97, 0, 67, 84, 65, 66,
28, 0, 0, 0, 87, 1,
0, 0, 0, 2, 255, 255,
7, 0, 0, 0, 28, 0,
0, 0, 0, 1, 0, 0,
80, 1, 0, 0, 168, 0,
0, 0, 2, 0, 3, 0,
1, 0, 0, 0, 176, 0,
0, 0, 0, 0, 0, 0,
192, 0, 0, 0, 2, 0,
2, 0, 1, 0, 0, 0,
176, 0, 0, 0, 0, 0,
0, 0, 199, 0, 0, 0,
2, 0, 1, 0, 1, 0,
0, 0, 176, 0, 0, 0,
0, 0, 0, 0, 206, 0,
0, 0, 2, 0, 0, 0,
1, 0, 0, 0, 176, 0,
0, 0, 0, 0, 0, 0,
214, 0, 0, 0, 3, 0,
1, 0, 1, 0, 0, 0,
240, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0,
3, 0, 2, 0, 1, 0,
0, 0, 24, 1, 0, 0,
0, 0, 0, 0, 40, 1,
0, 0, 3, 0, 0, 0,
1, 0, 0, 0, 64, 1,
0, 0, 0, 0, 0, 0,
66, 99, 111, 101, 102, 102,
0, 171, 1, 0, 3, 0,
1, 0, 4, 0, 1, 0,
0, 0, 0, 0, 0, 0,
71, 99, 111, 101, 102, 102,
0, 82, 99, 111, 101, 102,
102, 0, 89, 111, 102, 102,
115, 101, 116, 0, 116, 104,
101, 83, 97, 109, 112, 108,
101, 114, 43, 116, 104, 101,
84, 101, 120, 116, 117, 114,
101, 85, 0, 171, 171, 171,
4, 0, 7, 0, 1, 0,
4, 0, 1, 0, 0, 0,
0, 0, 0, 0, 116, 104,
101, 83, 97, 109, 112, 108,
101, 114, 43, 116, 104, 101,
84, 101, 120, 116, 117, 114,
101, 86, 0, 171, 4, 0,
7, 0, 1, 0, 4, 0,
1, 0, 0, 0, 0, 0,
0, 0, 116, 104, 101, 83,
97, 109, 112, 108, 101, 114,
43, 116, 104, 101, 84, 101,
120, 116, 117, 114, 101, 89,
0, 171, 4, 0, 7, 0,
1, 0, 4, 0, 1, 0,
0, 0, 0, 0, 0, 0,
112, 115, 95, 50, 95, 48,
0, 77, 105, 99, 114, 111,
115, 111, 102, 116, 32, 40,
82, 41, 32, 72, 76, 83,
76, 32, 83, 104, 97, 100,
101, 114, 32, 67, 111, 109,
112, 105, 108, 101, 114, 32,
49, 48, 46, 49, 0, 171,
81, 0, 0, 5, 4, 0,
15, 160, 0, 0, 128, 63,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
31, 0, 0, 2, 0, 0,
0, 128, 0, 0, 3, 176,
31, 0, 0, 2, 0, 0,
0, 128, 0, 0, 15, 144,
31, 0, 0, 2, 0, 0,
0, 144, 0, 8, 15, 160,
31, 0, 0, 2, 0, 0,
0, 144, 1, 8, 15, 160,
31, 0, 0, 2, 0, 0,
0, 144, 2, 8, 15, 160,
66, 0, 0, 3, 0, 0,
15, 128, 0, 0, 228, 176,
0, 8, 228, 160, 66, 0,
0, 3, 1, 0, 15, 128,
0, 0, 228, 176, 1, 8,
228, 160, 66, 0, 0, 3,
2, 0, 15, 128, 0, 0,
228, 176, 2, 8, 228, 160,
1, 0, 0, 2, 0, 0,
2, 128, 1, 0, 0, 128,
1, 0, 0, 2, 0, 0,
4, 128, 2, 0, 0, 128,
2, 0, 0, 3, 0, 0,
7, 128, 0, 0, 228, 128,
0, 0, 228, 160, 8, 0,
0, 3, 1, 0, 1, 128,
0, 0, 228, 128, 1, 0,
228, 160, 8, 0, 0, 3,
1, 0, 2, 128, 0, 0,
228, 128, 2, 0, 228, 160,
8, 0, 0, 3, 1, 0,
4, 128, 0, 0, 228, 128,
3, 0, 228, 160, 1, 0,
0, 2, 1, 0, 8, 128,
4, 0, 0, 160, 5, 0,
0, 3, 0, 0, 15, 128,
1, 0, 228, 128, 0, 0,
228, 144, 1, 0, 0, 2,
0, 8, 15, 128, 0, 0,
228, 128, 255, 255, 0, 0
};

View File

@@ -90,6 +90,7 @@ typedef struct
typedef struct
{
D3D_TextureRep texture;
D3D_TextureRep palette;
D3D9_Shader shader;
const float *shader_params;
@@ -198,6 +199,7 @@ static D3DFORMAT PixelFormatToD3DFMT(Uint32 format)
return D3DFMT_X8R8G8B8;
case SDL_PIXELFORMAT_ARGB8888:
return D3DFMT_A8R8G8B8;
case SDL_PIXELFORMAT_INDEX8:
case SDL_PIXELFORMAT_YV12:
case SDL_PIXELFORMAT_IYUV:
case SDL_PIXELFORMAT_NV12:
@@ -552,6 +554,13 @@ static bool D3D_CreateTexture(SDL_Renderer *renderer, SDL_Texture *texture, SDL_
if (!D3D_CreateTextureRep(data->device, &texturedata->texture, usage, texture->format, PixelFormatToD3DFMT(texture->format), texture->w, texture->h)) {
return false;
}
if (texture->format == SDL_PIXELFORMAT_INDEX8) {
if (!D3D_CreateTextureRep(data->device, &texturedata->palette, usage, SDL_PIXELFORMAT_ARGB8888, D3DFMT_A8R8G8B8, 256, 1)) {
return false;
}
texturedata->shader = SHADER_PALETTE;
}
#ifdef SDL_HAVE_YUV
if (texture->format == SDL_PIXELFORMAT_YV12 ||
texture->format == SDL_PIXELFORMAT_IYUV) {
@@ -587,6 +596,12 @@ static bool D3D_RecreateTexture(SDL_Renderer *renderer, SDL_Texture *texture)
if (!D3D_RecreateTextureRep(data->device, &texturedata->texture)) {
return false;
}
if (texture->format == SDL_PIXELFORMAT_INDEX8) {
if (!D3D_RecreateTextureRep(data->device, &texturedata->palette)) {
return false;
}
texture->palette_version = 0;
}
#ifdef SDL_HAVE_YUV
if (texturedata->yuv) {
if (!D3D_RecreateTextureRep(data->device, &texturedata->utexture)) {
@@ -601,6 +616,24 @@ static bool D3D_RecreateTexture(SDL_Renderer *renderer, SDL_Texture *texture)
return true;
}
static bool D3D_UpdateTexturePalette(SDL_Renderer *renderer, SDL_Texture *texture)
{
D3D_RenderData *data = (D3D_RenderData *)renderer->internal;
D3D_TextureData *texturedata = (D3D_TextureData *)texture->internal;
const int ncolors = texture->palette->ncolors;
const SDL_Color *colors = texture->palette->colors;
Uint32 palette[256];
if (!texturedata) {
return SDL_SetError("Texture is not currently available");
}
for (int i = 0; i < ncolors; ++i) {
palette[i] = (colors[i].a << 24) | (colors[i].r << 16) | (colors[i].g << 8) | colors[i].b;
}
return D3D_UpdateTextureRep(data->device, &texturedata->palette, 0, 0, 256, 1, palette, sizeof(palette));
}
static bool D3D_UpdateTexture(SDL_Renderer *renderer, SDL_Texture *texture,
const SDL_Rect *rect, const void *pixels, int pitch)
{
@@ -980,6 +1013,11 @@ static bool SetupTextureState(D3D_RenderData *data, SDL_Texture *texture, D3D9_S
if (!BindTextureRep(data->device, &texturedata->texture, 0)) {
return false;
}
if (texture->format == SDL_PIXELFORMAT_INDEX8) {
if (!BindTextureRep(data->device, &texturedata->palette, 1)) {
return false;
}
}
#ifdef SDL_HAVE_YUV
if (texturedata->yuv) {
if (!BindTextureRep(data->device, &texturedata->utexture, 1)) {
@@ -999,10 +1037,8 @@ static bool SetDrawState(D3D_RenderData *data, const SDL_RenderCommand *cmd)
const SDL_BlendMode blend = cmd->data.draw.blend;
if (texture != data->drawstate.texture) {
#ifdef SDL_HAVE_YUV
D3D_TextureData *oldtexturedata = data->drawstate.texture ? (D3D_TextureData *)data->drawstate.texture->internal : NULL;
D3D_TextureData *newtexturedata = texture ? (D3D_TextureData *)texture->internal : NULL;
#endif
D3D9_Shader shader = SHADER_NONE;
const float *shader_params = NULL;
@@ -1010,6 +1046,10 @@ static bool SetDrawState(D3D_RenderData *data, const SDL_RenderCommand *cmd)
if (!texture) {
IDirect3DDevice9_SetTexture(data->device, 0, NULL);
}
if ((!newtexturedata || (texture->format != SDL_PIXELFORMAT_INDEX8)) &&
(oldtexturedata && (data->drawstate.texture->format == SDL_PIXELFORMAT_INDEX8))) {
IDirect3DDevice9_SetTexture(data->device, 1, NULL);
}
#ifdef SDL_HAVE_YUV
if ((!newtexturedata || !newtexturedata->yuv) && (oldtexturedata && oldtexturedata->yuv)) {
IDirect3DDevice9_SetTexture(data->device, 1, NULL);
@@ -1046,6 +1086,9 @@ static bool SetDrawState(D3D_RenderData *data, const SDL_RenderCommand *cmd)
D3D_TextureData *texturedata = (D3D_TextureData *)texture->internal;
if (texturedata) {
UpdateDirtyTexture(data->device, &texturedata->texture);
if (texture->format == SDL_PIXELFORMAT_INDEX8) {
UpdateDirtyTexture(data->device, &texturedata->palette);
}
#ifdef SDL_HAVE_YUV
if (texturedata->yuv) {
UpdateDirtyTexture(data->device, &texturedata->utexture);
@@ -1445,8 +1488,11 @@ static void D3D_DestroyTexture(SDL_Renderer *renderer, SDL_Texture *texture)
renderdata->drawstate.shader_params = NULL;
IDirect3DDevice9_SetPixelShader(renderdata->device, NULL);
IDirect3DDevice9_SetTexture(renderdata->device, 0, NULL);
if (texture->format == SDL_PIXELFORMAT_INDEX8) {
IDirect3DDevice9_SetTexture(renderdata->device, 1, NULL);
}
#ifdef SDL_HAVE_YUV
if (data->yuv) {
if (data && data->yuv) {
IDirect3DDevice9_SetTexture(renderdata->device, 1, NULL);
IDirect3DDevice9_SetTexture(renderdata->device, 2, NULL);
}
@@ -1483,14 +1529,12 @@ static void D3D_DestroyRenderer(SDL_Renderer *renderer)
IDirect3DSurface9_Release(data->currentRenderTarget);
data->currentRenderTarget = NULL;
}
#ifdef SDL_HAVE_YUV
for (i = 0; i < SDL_arraysize(data->shaders); ++i) {
if (data->shaders[i]) {
IDirect3DPixelShader9_Release(data->shaders[i]);
data->shaders[i] = NULL;
}
}
#endif
// Release all vertex buffers
for (i = 0; i < SDL_arraysize(data->vertexBuffers); ++i) {
if (data->vertexBuffers[i]) {
@@ -1668,6 +1712,7 @@ static bool D3D_CreateRenderer(SDL_Renderer *renderer, SDL_Window *window, SDL_P
renderer->WindowEvent = D3D_WindowEvent;
renderer->SupportsBlendMode = D3D_SupportsBlendMode;
renderer->CreateTexture = D3D_CreateTexture;
renderer->UpdateTexturePalette = D3D_UpdateTexturePalette;
renderer->UpdateTexture = D3D_UpdateTexture;
#ifdef SDL_HAVE_YUV
renderer->UpdateTextureYUV = D3D_UpdateTextureYUV;
@@ -1771,19 +1816,19 @@ static bool D3D_CreateRenderer(SDL_Renderer *renderer, SDL_Window *window, SDL_P
// Set up parameters for rendering
D3D_InitRenderState(data);
for (int i = SHADER_NONE + 1; i < SDL_arraysize(data->shaders); ++i) {
result = D3D9_CreatePixelShader(data->device, (D3D9_Shader)i, &data->shaders[i]);
if (FAILED(result)) {
D3D_SetError("CreatePixelShader()", result);
}
}
if (caps.MaxSimultaneousTextures >= 2 && data->shaders[SHADER_PALETTE]) {
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_INDEX8);
}
#ifdef SDL_HAVE_YUV
if (caps.MaxSimultaneousTextures >= 3) {
int i;
for (i = SHADER_NONE + 1; i < SDL_arraysize(data->shaders); ++i) {
result = D3D9_CreatePixelShader(data->device, (D3D9_Shader)i, &data->shaders[i]);
if (FAILED(result)) {
D3D_SetError("CreatePixelShader()", result);
}
}
if (data->shaders[SHADER_YUV]) {
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_YV12);
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_IYUV);
}
if (caps.MaxSimultaneousTextures >= 3 && data->shaders[SHADER_YUV]) {
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_YV12);
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_IYUV);
}
#endif

View File

@@ -30,12 +30,17 @@
// The shaders here were compiled with compile_shaders.bat
#define g_ps20_main D3D9_PixelShader_Palette
#include "D3D9_PixelShader_Palette.h"
#undef g_ps20_main
#define g_ps20_main D3D9_PixelShader_YUV
#include "D3D9_PixelShader_YUV.h"
#undef g_ps20_main
static const BYTE *D3D9_shaders[] = {
NULL,
D3D9_PixelShader_Palette,
D3D9_PixelShader_YUV
};
SDL_COMPILE_TIME_ASSERT(D3D9_shaders, SDL_arraysize(D3D9_shaders) == NUM_SHADERS);

View File

@@ -25,6 +25,7 @@
typedef enum
{
SHADER_NONE,
SHADER_PALETTE,
SHADER_YUV,
NUM_SHADERS
} D3D9_Shader;

View File

@@ -1 +1,2 @@
fxc /T ps_2_0 /Fh D3D9_PixelShader_Palette.h D3D9_PixelShader_Palette.hlsl
fxc /T ps_2_0 /Fh D3D9_PixelShader_YUV.h D3D9_PixelShader_YUV.hlsl