Added an advanced shader for the GPU renderer

This allows us to support YUV colorspaces and HDR in the GPU renderer.

Fixes https://github.com/libsdl-org/SDL/issues/11281
Fixes https://github.com/libsdl-org/SDL/issues/11282
This commit is contained in:
Sam Lantinga
2025-09-29 21:23:15 -07:00
parent fe3b399b7e
commit 9735c45db6
42 changed files with 5205 additions and 3589 deletions

View File

@@ -35,13 +35,51 @@ typedef struct GPU_VertexShaderUniformData
SDL_FColor color;
} GPU_VertexShaderUniformData;
typedef struct GPU_FragmentShaderUniformData
typedef struct GPU_SimpleFragmentShaderUniformData
{
float color_scale;
} GPU_SimpleFragmentShaderUniformData;
typedef struct GPU_AdvancedFragmentShaderUniformData
{
float scRGB_output;
float texture_type;
float input_type;
float color_scale;
float texel_width;
float texel_height;
float texture_width;
float texture_height;
} GPU_FragmentShaderUniformData;
float tonemap_method;
float tonemap_factor1;
float tonemap_factor2;
float sdr_white_point;
float YCbCr_matrix[16];
} GPU_AdvancedFragmentShaderUniformData;
// These should mirror the definitions in shaders/texture_advanced.frag.hlsl
static const float TONEMAP_NONE = 0;
//static const float TONEMAP_LINEAR = 1;
static const float TONEMAP_CHROME = 2;
//static const float TEXTURETYPE_NONE = 0;
static const float TEXTURETYPE_RGB = 1;
static const float TEXTURETYPE_RGB_PIXELART = 2;
static const float TEXTURETYPE_RGBA = 3;
static const float TEXTURETYPE_RGBA_PIXELART = 4;
static const float TEXTURETYPE_PALETTE = 5;
static const float TEXTURETYPE_PALETTE_PIXELART = 6;
static const float TEXTURETYPE_NV12 = 7;
static const float TEXTURETYPE_NV21 = 8;
static const float TEXTURETYPE_YUV = 9;
static const float INPUTTYPE_UNSPECIFIED = 0;
static const float INPUTTYPE_SRGB = 1;
static const float INPUTTYPE_SCRGB = 2;
static const float INPUTTYPE_HDR10 = 3;
typedef struct GPU_RenderData
{
@@ -95,10 +133,20 @@ typedef struct GPU_TextureData
{
SDL_GPUTexture *texture;
SDL_GPUTextureFormat format;
GPU_FragmentShaderID shader;
void *pixels;
int pitch;
SDL_Rect locked_rect;
const float *YCbCr_matrix;
#ifdef SDL_HAVE_YUV
// YV12 texture support
bool yuv;
SDL_GPUTexture *textureU;
SDL_GPUTexture *textureV;
// NV12 texture support
bool nv12;
SDL_GPUTexture *textureNV;
#endif
} GPU_TextureData;
static bool GPU_SupportsBlendMode(SDL_Renderer *renderer, SDL_BlendMode blendMode)
@@ -211,10 +259,32 @@ static bool GPU_CreateTexture(SDL_Renderer *renderer, SDL_Texture *texture, SDL_
SDL_GPUTextureFormat format;
SDL_GPUTextureUsageFlags usage = SDL_GPU_TEXTUREUSAGE_SAMPLER;
if (texture->format == SDL_PIXELFORMAT_INDEX8) {
switch (texture->format) {
case SDL_PIXELFORMAT_INDEX8:
case SDL_PIXELFORMAT_YV12:
case SDL_PIXELFORMAT_IYUV:
case SDL_PIXELFORMAT_NV12:
case SDL_PIXELFORMAT_NV21:
format = SDL_GPU_TEXTUREFORMAT_R8_UNORM;
} else {
break;
case SDL_PIXELFORMAT_P010:
format = SDL_GPU_TEXTUREFORMAT_R16_UNORM;
break;
default:
format = SDL_GetGPUTextureFormatFromPixelFormat(texture->format);
break;
}
if (renderer->output_colorspace == SDL_COLORSPACE_SRGB_LINEAR) {
switch (format) {
case SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM:
format = SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM_SRGB;
break;
case SDL_GPU_TEXTUREFORMAT_B8G8R8A8_UNORM:
format = SDL_GPU_TEXTUREFORMAT_B8G8R8A8_UNORM_SRGB;
break;
default:
break;
}
}
if (format == SDL_GPU_TEXTUREFORMAT_INVALID) {
return SDL_SetError("Texture format %s not supported by SDL_GPU",
@@ -236,7 +306,8 @@ static bool GPU_CreateTexture(SDL_Renderer *renderer, SDL_Texture *texture, SDL_
size += 2 * ((texture->h + 1) / 2) * ((data->pitch + 1) / 2);
}
if (texture->format == SDL_PIXELFORMAT_NV12 ||
texture->format == SDL_PIXELFORMAT_NV21) {
texture->format == SDL_PIXELFORMAT_NV21 ||
texture->format == SDL_PIXELFORMAT_P010) {
// Need to add size for the U/V plane
size += 2 * ((texture->h + 1) / 2) * ((data->pitch + 1) / 2);
}
@@ -273,26 +344,74 @@ static bool GPU_CreateTexture(SDL_Renderer *renderer, SDL_Texture *texture, SDL_
SDL_PropertiesID props = SDL_GetTextureProperties(texture);
SDL_SetPointerProperty(props, SDL_PROP_TEXTURE_GPU_TEXTURE_POINTER, data->texture);
if (texture->format == SDL_PIXELFORMAT_RGBA32 || texture->format == SDL_PIXELFORMAT_BGRA32) {
data->shader = FRAG_SHADER_TEXTURE_RGBA;
} else {
data->shader = FRAG_SHADER_TEXTURE_RGB;
}
#ifdef SDL_HAVE_YUV
if (texture->format == SDL_PIXELFORMAT_YV12 ||
texture->format == SDL_PIXELFORMAT_IYUV) {
data->yuv = true;
tci.width = (tci.width + 1) / 2;
tci.height = (tci.height + 1) / 2;
data->textureU = SDL_CreateGPUTexture(renderdata->device, &tci);
if (!data->textureU) {
return false;
}
SDL_SetPointerProperty(props, SDL_PROP_TEXTURE_GPU_TEXTURE_U_POINTER, data->textureU);
data->textureV = SDL_CreateGPUTexture(renderdata->device, &tci);
if (!data->textureV) {
return false;
}
SDL_SetPointerProperty(props, SDL_PROP_TEXTURE_GPU_TEXTURE_V_POINTER, data->textureU);
data->YCbCr_matrix = SDL_GetYCbCRtoRGBConversionMatrix(texture->colorspace, texture->w, texture->h, 8);
if (!data->YCbCr_matrix) {
return SDL_SetError("Unsupported YUV colorspace");
}
}
if (texture->format == SDL_PIXELFORMAT_NV12 ||
texture->format == SDL_PIXELFORMAT_NV21 ||
texture->format == SDL_PIXELFORMAT_P010) {
int bits_per_pixel;
data->nv12 = true;
tci.width = ((tci.width + 1) / 2);
tci.height = ((tci.height + 1) / 2);
if (texture->format == SDL_PIXELFORMAT_P010) {
tci.format = SDL_GPU_TEXTUREFORMAT_R16G16_UNORM;
} else {
tci.format = SDL_GPU_TEXTUREFORMAT_R8G8_UNORM;
}
data->textureNV = SDL_CreateGPUTexture(renderdata->device, &tci);
if (!data->textureNV) {
return false;
}
SDL_SetPointerProperty(props, SDL_PROP_TEXTURE_GPU_TEXTURE_UV_POINTER, data->textureNV);
switch (texture->format) {
case SDL_PIXELFORMAT_P010:
bits_per_pixel = 10;
break;
default:
bits_per_pixel = 8;
break;
}
data->YCbCr_matrix = SDL_GetYCbCRtoRGBConversionMatrix(texture->colorspace, texture->w, texture->h, bits_per_pixel);
if (!data->YCbCr_matrix) {
return SDL_SetError("Unsupported YUV colorspace");
}
}
#endif // SDL_HAVE_YUV
return true;
}
static bool GPU_UpdateTexture(SDL_Renderer *renderer, SDL_Texture *texture,
const SDL_Rect *rect, const void *pixels, int pitch)
static bool GPU_UpdateTextureInternal(GPU_RenderData *renderdata, SDL_GPUCopyPass *cpass, SDL_GPUTexture *texture, int bpp, int x, int y, int w, int h, const void *pixels, int pitch)
{
GPU_RenderData *renderdata = (GPU_RenderData *)renderer->internal;
GPU_TextureData *data = (GPU_TextureData *)texture->internal;
const Uint32 texturebpp = SDL_BYTESPERPIXEL(texture->format);
size_t row_size, data_size;
if (!SDL_size_mul_check_overflow(rect->w, texturebpp, &row_size) ||
!SDL_size_mul_check_overflow(rect->h, row_size, &data_size)) {
if (!SDL_size_mul_check_overflow(w, bpp, &row_size) ||
!SDL_size_mul_check_overflow(h, row_size, &data_size)) {
return SDL_SetError("update size overflow");
}
@@ -302,54 +421,144 @@ static bool GPU_UpdateTexture(SDL_Renderer *renderer, SDL_Texture *texture,
tbci.usage = SDL_GPU_TRANSFERBUFFERUSAGE_UPLOAD;
SDL_GPUTransferBuffer *tbuf = SDL_CreateGPUTransferBuffer(renderdata->device, &tbci);
if (tbuf == NULL) {
return false;
}
Uint8 *output = SDL_MapGPUTransferBuffer(renderdata->device, tbuf, false);
if (!output) {
return false;
}
if ((size_t)pitch == row_size) {
SDL_memcpy(output, pixels, data_size);
} else {
// FIXME is negative pitch supposed to work?
// If not, maybe use SDL_GPUTextureTransferInfo::pixels_per_row instead of this
const Uint8 *input = pixels;
for (int i = 0; i < rect->h; ++i) {
for (int i = 0; i < h; ++i) {
SDL_memcpy(output, input, row_size);
output += row_size;
input += pitch;
}
}
SDL_UnmapGPUTransferBuffer(renderdata->device, tbuf);
SDL_GPUCommandBuffer *cbuf = renderdata->state.command_buffer;
SDL_GPUCopyPass *cpass = SDL_BeginGPUCopyPass(cbuf);
SDL_GPUTextureTransferInfo tex_src;
SDL_zero(tex_src);
tex_src.transfer_buffer = tbuf;
tex_src.rows_per_layer = rect->h;
tex_src.pixels_per_row = rect->w;
tex_src.rows_per_layer = h;
tex_src.pixels_per_row = w;
SDL_GPUTextureRegion tex_dst;
SDL_zero(tex_dst);
tex_dst.texture = data->texture;
tex_dst.x = rect->x;
tex_dst.y = rect->y;
tex_dst.w = rect->w;
tex_dst.h = rect->h;
tex_dst.texture = texture;
tex_dst.x = x;
tex_dst.y = y;
tex_dst.w = w;
tex_dst.h = h;
tex_dst.d = 1;
SDL_UploadToGPUTexture(cpass, &tex_src, &tex_dst, false);
SDL_EndGPUCopyPass(cpass);
SDL_ReleaseGPUTransferBuffer(renderdata->device, tbuf);
return true;
}
#ifdef SDL_HAVE_YUV
static bool GPU_UpdateTextureNV(SDL_Renderer *renderer, SDL_Texture *texture,
const SDL_Rect *rect,
const Uint8 *Yplane, int Ypitch,
const Uint8 *UVplane, int UVpitch);
static bool GPU_UpdateTextureYUV(SDL_Renderer *renderer, SDL_Texture *texture,
const SDL_Rect *rect,
const Uint8 *Yplane, int Ypitch,
const Uint8 *Uplane, int Upitch,
const Uint8 *Vplane, int Vpitch);
#endif
static bool GPU_UpdateTexture(SDL_Renderer *renderer, SDL_Texture *texture, const SDL_Rect *rect, const void *pixels, int pitch)
{
GPU_RenderData *renderdata = (GPU_RenderData *)renderer->internal;
GPU_TextureData *data = (GPU_TextureData *)texture->internal;
bool retval = true;
SDL_GPUCommandBuffer *cbuf = renderdata->state.command_buffer;
SDL_GPUCopyPass *cpass = SDL_BeginGPUCopyPass(cbuf);
int bpp = SDL_BYTESPERPIXEL(texture->format);
retval = GPU_UpdateTextureInternal(renderdata, cpass, data->texture, bpp, rect->x, rect->y, rect->w, rect->h, pixels, pitch);
#ifdef SDL_HAVE_YUV
if (data->nv12) {
const Uint8 *Yplane = (const Uint8 *)pixels;
const Uint8 *UVplane = Yplane + rect->h * pitch;
int UVpitch;
bpp *= 2;
if (texture->format == SDL_PIXELFORMAT_P010) {
UVpitch = (pitch + 3) & ~3;
} else {
bpp = 1;
UVpitch = (pitch + 1) & ~1;
}
retval &= GPU_UpdateTextureInternal(renderdata, cpass, data->textureNV, bpp, rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, UVplane, UVpitch);
} else if (data->yuv) {
int Ypitch = pitch;
int UVpitch = ((Ypitch + 1) / 2);
const Uint8 *Yplane = (const Uint8 *)pixels;
const Uint8 *Uplane = Yplane + rect->h * Ypitch;
const Uint8 *Vplane = Uplane + ((rect->h + 1) / 2) * UVpitch;
retval &= GPU_UpdateTextureInternal(renderdata, cpass, data->textureU, bpp, rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, Uplane, UVpitch);
retval &= GPU_UpdateTextureInternal(renderdata, cpass, data->textureV, bpp, rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, Vplane, UVpitch);
}
#endif
SDL_EndGPUCopyPass(cpass);
return retval;
}
#ifdef SDL_HAVE_YUV
static bool GPU_UpdateTextureYUV(SDL_Renderer *renderer, SDL_Texture *texture,
const SDL_Rect *rect,
const Uint8 *Yplane, int Ypitch,
const Uint8 *Uplane, int Upitch,
const Uint8 *Vplane, int Vpitch)
{
GPU_RenderData *renderdata = (GPU_RenderData *)renderer->internal;
GPU_TextureData *data = (GPU_TextureData *)texture->internal;
int bpp = SDL_BYTESPERPIXEL(texture->format);
bool retval = true;
SDL_GPUCommandBuffer *cbuf = renderdata->state.command_buffer;
SDL_GPUCopyPass *cpass = SDL_BeginGPUCopyPass(cbuf);
retval &= GPU_UpdateTextureInternal(renderdata, cpass, data->texture, bpp, rect->x, rect->y, rect->w, rect->h, Yplane, Ypitch);
retval &= GPU_UpdateTextureInternal(renderdata, cpass, data->textureU, bpp, rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, Uplane, Upitch);
retval &= GPU_UpdateTextureInternal(renderdata, cpass, data->textureV, bpp, rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, Vplane, Vpitch);
SDL_EndGPUCopyPass(cpass);
return retval;
}
static bool GPU_UpdateTextureNV(SDL_Renderer *renderer, SDL_Texture *texture,
const SDL_Rect *rect,
const Uint8 *Yplane, int Ypitch,
const Uint8 *UVplane, int UVpitch)
{
GPU_RenderData *renderdata = (GPU_RenderData *)renderer->internal;
GPU_TextureData *data = (GPU_TextureData *)texture->internal;
int bpp = SDL_BYTESPERPIXEL(texture->format);
bool retval = true;
SDL_GPUCommandBuffer *cbuf = renderdata->state.command_buffer;
SDL_GPUCopyPass *cpass = SDL_BeginGPUCopyPass(cbuf);
retval &= GPU_UpdateTextureInternal(renderdata, cpass, data->texture, bpp, rect->x, rect->y, rect->w, rect->h, Yplane, Ypitch);
bpp *= 2;
retval &= GPU_UpdateTextureInternal(renderdata, cpass, data->textureNV, bpp, rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, UVplane, UVpitch);
SDL_EndGPUCopyPass(cpass);
return retval;
}
#endif // SDL_HAVE_YUV
static bool GPU_LockTexture(SDL_Renderer *renderer, SDL_Texture *texture,
const SDL_Rect *rect, void **pixels, int *pitch)
{
@@ -431,7 +640,6 @@ static bool GPU_QueueGeometry(SDL_Renderer *renderer, SDL_RenderCommand *cmd, SD
int count = indices ? num_indices : num_vertices;
float *verts;
size_t sz = 2 * sizeof(float) + 4 * sizeof(float) + (texture ? 2 : 0) * sizeof(float);
const float color_scale = cmd->data.draw.color_scale;
bool convert_color = SDL_RenderingLinearSpace(renderer);
verts = (float *)SDL_AllocateRenderVertices(renderer, count * sz, 0, &cmd->data.draw.first);
@@ -466,11 +674,9 @@ static bool GPU_QueueGeometry(SDL_Renderer *renderer, SDL_RenderCommand *cmd, SD
SDL_ConvertToLinear(&col_);
}
// FIXME: The Vulkan backend doesn't multiply by color_scale. GL does. I'm not sure which one is wrong.
// ANSWER: The color scale should be applied in linear space when using the scRGB colorspace. This is done in shaders in the Vulkan backend.
*(verts++) = col_.r * color_scale;
*(verts++) = col_.g * color_scale;
*(verts++) = col_.b * color_scale;
*(verts++) = col_.r;
*(verts++) = col_.g;
*(verts++) = col_.b;
*(verts++) = col_.a;
if (texture) {
@@ -524,21 +730,6 @@ static void PushVertexUniforms(GPU_RenderData *data, SDL_RenderCommand *cmd)
SDL_PushGPUVertexUniformData(data->state.command_buffer, 0, &uniforms, sizeof(uniforms));
}
static void PushFragmentUniforms(GPU_RenderData *data, SDL_RenderCommand *cmd)
{
if (cmd->data.draw.texture &&
cmd->data.draw.texture_scale_mode == SDL_SCALEMODE_PIXELART) {
SDL_Texture *texture = cmd->data.draw.texture;
GPU_FragmentShaderUniformData uniforms;
SDL_zero(uniforms);
uniforms.texture_width = texture->w;
uniforms.texture_height = texture->h;
uniforms.texel_width = 1.0f / uniforms.texture_width;
uniforms.texel_height = 1.0f / uniforms.texture_height;
SDL_PushGPUFragmentUniformData(data->state.command_buffer, 0, &uniforms, sizeof(uniforms));
}
}
static void SetViewportAndScissor(GPU_RenderData *data)
{
SDL_SetGPUViewport(data->state.render_pass, &data->state.viewport);
@@ -609,6 +800,104 @@ static SDL_GPUSampler *GetSampler(GPU_RenderData *data, SDL_ScaleMode scale_mode
return data->samplers[key];
}
static void CalculateAdvancedShaderConstants(SDL_Renderer *renderer, const SDL_RenderCommand *cmd, const SDL_Texture *texture, GPU_AdvancedFragmentShaderUniformData *constants)
{
float output_headroom;
SDL_zerop(constants);
constants->scRGB_output = (float)SDL_RenderingLinearSpace(renderer);
constants->color_scale = cmd->data.draw.color_scale;
switch (texture->format) {
case SDL_PIXELFORMAT_YV12:
case SDL_PIXELFORMAT_IYUV:
constants->texture_type = TEXTURETYPE_YUV;
constants->input_type = INPUTTYPE_SRGB;
break;
case SDL_PIXELFORMAT_NV12:
constants->texture_type = TEXTURETYPE_NV12;
constants->input_type = INPUTTYPE_SRGB;
break;
case SDL_PIXELFORMAT_NV21:
constants->texture_type = TEXTURETYPE_NV21;
constants->input_type = INPUTTYPE_SRGB;
break;
case SDL_PIXELFORMAT_P010:
constants->texture_type = TEXTURETYPE_NV12;
constants->input_type = INPUTTYPE_HDR10;
break;
default:
switch (texture->format) {
case SDL_PIXELFORMAT_INDEX8:
if (cmd->data.draw.texture_scale_mode == SDL_SCALEMODE_PIXELART) {
constants->texture_type = TEXTURETYPE_PALETTE_PIXELART;
} else {
constants->texture_type = TEXTURETYPE_PALETTE;
}
break;
case SDL_PIXELFORMAT_BGRX32:
case SDL_PIXELFORMAT_RGBX32:
if (cmd->data.draw.texture_scale_mode == SDL_SCALEMODE_PIXELART) {
constants->texture_type = TEXTURETYPE_RGB_PIXELART;
} else {
constants->texture_type = TEXTURETYPE_RGB;
}
break;
case SDL_PIXELFORMAT_BGRA32:
case SDL_PIXELFORMAT_RGBA32:
case SDL_PIXELFORMAT_ABGR2101010:
case SDL_PIXELFORMAT_RGBA64_FLOAT:
if (cmd->data.draw.texture_scale_mode == SDL_SCALEMODE_PIXELART) {
constants->texture_type = TEXTURETYPE_RGBA_PIXELART;
} else {
constants->texture_type = TEXTURETYPE_RGBA;
}
break;
default:
SDL_assert(!"Unhandled pixel format");
break;
}
if (texture->colorspace == SDL_COLORSPACE_SRGB_LINEAR) {
constants->input_type = INPUTTYPE_SCRGB;
} else if (texture->colorspace == SDL_COLORSPACE_HDR10) {
constants->input_type = INPUTTYPE_HDR10;
} else {
// The sampler will convert from sRGB to linear on load if working in linear colorspace
constants->input_type = INPUTTYPE_UNSPECIFIED;
}
break;
}
if (cmd->data.draw.texture_scale_mode == SDL_SCALEMODE_PIXELART) {
constants->texture_width = texture->w;
constants->texture_height = texture->h;
constants->texel_width = 1.0f / constants->texture_width;
constants->texel_height = 1.0f / constants->texture_height;
}
constants->sdr_white_point = texture->SDR_white_point;
if (renderer->target) {
output_headroom = renderer->target->HDR_headroom;
} else {
output_headroom = renderer->HDR_headroom;
}
if (texture->HDR_headroom > output_headroom) {
constants->tonemap_method = TONEMAP_CHROME;
constants->tonemap_factor1 = (output_headroom / (texture->HDR_headroom * texture->HDR_headroom));
constants->tonemap_factor2 = (1.0f / output_headroom);
}
#ifdef SDL_HAVE_YUV
GPU_TextureData *data = (GPU_TextureData *)texture->internal;
if (data->yuv || data->nv12) {
SDL_memcpy(constants->YCbCr_matrix, data->YCbCr_matrix, sizeof(constants->YCbCr_matrix));
}
#endif
}
static void Draw(
GPU_RenderData *data, SDL_RenderCommand *cmd,
Uint32 num_verts,
@@ -624,29 +913,26 @@ static void Draw(
SDL_GPUShader *custom_frag_shader = custom_state ? custom_state->fragment_shader : NULL;
GPU_VertexShaderID v_shader;
GPU_FragmentShaderID f_shader;
GPU_SimpleFragmentShaderUniformData simple_constants = { cmd->data.draw.color_scale };
GPU_AdvancedFragmentShaderUniformData advanced_constants;
if (prim == SDL_GPU_PRIMITIVETYPE_TRIANGLELIST) {
SDL_Texture *texture = cmd->data.draw.texture;
if (texture) {
v_shader = VERT_SHADER_TRI_TEXTURE;
if (texture->format == SDL_PIXELFORMAT_INDEX8) {
if (cmd->data.draw.texture_scale_mode == SDL_SCALEMODE_PIXELART) {
f_shader = FRAG_SHADER_TEXTURE_PALETTE_PIXELART;
} else {
f_shader = FRAG_SHADER_TEXTURE_PALETTE;
}
} else if (texture->format == SDL_PIXELFORMAT_RGBA32 || texture->format == SDL_PIXELFORMAT_BGRA32) {
if (cmd->data.draw.texture_scale_mode == SDL_SCALEMODE_PIXELART) {
f_shader = FRAG_SHADER_TEXTURE_RGBA_PIXELART;
} else {
CalculateAdvancedShaderConstants(texture->renderer, cmd, texture, &advanced_constants);
if ((advanced_constants.texture_type == TEXTURETYPE_RGB ||
advanced_constants.texture_type == TEXTURETYPE_RGBA) &&
advanced_constants.input_type == INPUTTYPE_UNSPECIFIED &&
advanced_constants.tonemap_method == TONEMAP_NONE) {
if (texture->format == SDL_PIXELFORMAT_RGBA32 || texture->format == SDL_PIXELFORMAT_BGRA32) {
f_shader = FRAG_SHADER_TEXTURE_RGBA;
}
} else {
if (cmd->data.draw.texture_scale_mode == SDL_SCALEMODE_PIXELART) {
f_shader = FRAG_SHADER_TEXTURE_RGB_PIXELART;
} else {
f_shader = FRAG_SHADER_TEXTURE_RGB;
}
} else {
f_shader = FRAG_SHADER_TEXTURE_ADVANCED;
}
} else {
v_shader = VERT_SHADER_TRI_COLOR;
@@ -693,12 +979,29 @@ static void Draw(
sampler_bind.texture = tdata->texture;
SDL_BindGPUFragmentSamplers(pass, sampler_slot++, &sampler_bind, 1);
if (texture->palette) {
GPU_PaletteData *palette = (GPU_PaletteData *)texture->palette->internal;
if (f_shader == FRAG_SHADER_TEXTURE_ADVANCED) {
if (texture->palette) {
GPU_PaletteData *palette = (GPU_PaletteData *)texture->palette->internal;
sampler_bind.sampler = GetSampler(data, SDL_SCALEMODE_NEAREST, SDL_TEXTURE_ADDRESS_CLAMP, SDL_TEXTURE_ADDRESS_CLAMP);
sampler_bind.texture = palette->texture;
SDL_BindGPUFragmentSamplers(pass, sampler_slot++, &sampler_bind, 1);
sampler_bind.sampler = GetSampler(data, SDL_SCALEMODE_NEAREST, SDL_TEXTURE_ADDRESS_CLAMP, SDL_TEXTURE_ADDRESS_CLAMP);
sampler_bind.texture = palette->texture;
SDL_BindGPUFragmentSamplers(pass, sampler_slot++, &sampler_bind, 1);
#ifdef SDL_HAVE_YUV
} else if (tdata->yuv) {
sampler_bind.texture = tdata->textureU;
SDL_BindGPUFragmentSamplers(pass, sampler_slot++, &sampler_bind, 1);
sampler_bind.texture = tdata->textureV;
SDL_BindGPUFragmentSamplers(pass, sampler_slot++, &sampler_bind, 1);
} else if (tdata->nv12) {
sampler_bind.texture = tdata->textureNV;
SDL_BindGPUFragmentSamplers(pass, sampler_slot++, &sampler_bind, 1);
#endif
}
// We need to fill 3 sampler slots for the advanced shader
while (sampler_slot < 3) {
SDL_BindGPUFragmentSamplers(pass, sampler_slot++, &sampler_bind, 1);
}
}
}
if (custom_state) {
@@ -718,7 +1021,11 @@ static void Draw(
}
}
} else {
PushFragmentUniforms(data, cmd);
if (f_shader == FRAG_SHADER_TEXTURE_ADVANCED) {
SDL_PushGPUFragmentUniformData(data->state.command_buffer, 0, &advanced_constants, sizeof(advanced_constants));
} else {
SDL_PushGPUFragmentUniformData(data->state.command_buffer, 0, &simple_constants, sizeof(simple_constants));
}
}
SDL_GPUBufferBinding buffer_bind;
@@ -1160,6 +1467,11 @@ static void GPU_DestroyTexture(SDL_Renderer *renderer, SDL_Texture *texture)
}
SDL_ReleaseGPUTexture(renderdata->device, data->texture);
#ifdef SDL_HAVE_YUV
SDL_ReleaseGPUTexture(renderdata->device, data->textureU);
SDL_ReleaseGPUTexture(renderdata->device, data->textureV);
SDL_ReleaseGPUTexture(renderdata->device, data->textureNV);
#endif
SDL_free(data->pixels);
SDL_free(data);
texture->internal = NULL;
@@ -1262,8 +1574,9 @@ static bool GPU_CreateRenderer(SDL_Renderer *renderer, SDL_Window *window, SDL_P
SDL_SetupRendererColorspace(renderer, create_props);
if (renderer->output_colorspace != SDL_COLORSPACE_SRGB) {
// TODO
if (renderer->output_colorspace != SDL_COLORSPACE_SRGB &&
renderer->output_colorspace != SDL_COLORSPACE_SRGB_LINEAR &&
renderer->output_colorspace != SDL_COLORSPACE_HDR10) {
return SDL_SetError("Unsupported output colorspace");
}
@@ -1278,6 +1591,10 @@ static bool GPU_CreateRenderer(SDL_Renderer *renderer, SDL_Window *window, SDL_P
renderer->DestroyPalette = GPU_DestroyPalette;
renderer->CreateTexture = GPU_CreateTexture;
renderer->UpdateTexture = GPU_UpdateTexture;
#ifdef SDL_HAVE_YUV
renderer->UpdateTextureYUV = GPU_UpdateTextureYUV;
renderer->UpdateTextureNV = GPU_UpdateTextureNV;
#endif
renderer->LockTexture = GPU_LockTexture;
renderer->UnlockTexture = GPU_UnlockTexture;
renderer->SetRenderTarget = GPU_SetRenderTarget;
@@ -1341,7 +1658,7 @@ static bool GPU_CreateRenderer(SDL_Renderer *renderer, SDL_Window *window, SDL_P
return false;
}
// XXX what's a good initial size?
// FIXME: What's a good initial size?
if (!InitVertexBuffer(data, 1 << 16)) {
return false;
}
@@ -1350,7 +1667,18 @@ static bool GPU_CreateRenderer(SDL_Renderer *renderer, SDL_Window *window, SDL_P
return false;
}
data->swapchain.composition = SDL_GPU_SWAPCHAINCOMPOSITION_SDR;
switch (renderer->output_colorspace) {
case SDL_COLORSPACE_SRGB_LINEAR:
data->swapchain.composition = SDL_GPU_SWAPCHAINCOMPOSITION_HDR_EXTENDED_LINEAR;
break;
case SDL_COLORSPACE_HDR10:
data->swapchain.composition = SDL_GPU_SWAPCHAINCOMPOSITION_HDR10_ST2084;
break;
case SDL_COLORSPACE_SRGB:
default:
data->swapchain.composition = SDL_GPU_SWAPCHAINCOMPOSITION_SDR;
break;
}
data->swapchain.present_mode = SDL_GPU_PRESENTMODE_VSYNC;
int vsync = (int)SDL_GetNumberProperty(create_props, SDL_PROP_RENDERER_CREATE_PRESENT_VSYNC_NUMBER, 0);
@@ -1364,7 +1692,14 @@ static bool GPU_CreateRenderer(SDL_Renderer *renderer, SDL_Window *window, SDL_P
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_RGBA32);
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_BGRX32);
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_RGBX32);
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_ABGR2101010);
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_RGBA64_FLOAT);
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_INDEX8);
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_YV12);
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_IYUV);
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_NV12);
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_NV21);
SDL_AddSupportedTextureFormat(renderer, SDL_PIXELFORMAT_P010);
SDL_SetNumberProperty(SDL_GetRendererProperties(renderer), SDL_PROP_RENDERER_MAX_TEXTURE_SIZE_NUMBER, 16384);