From de5e0f17848445d626abc5083efcc15eb79399d0 Mon Sep 17 00:00:00 2001 From: Caleb Cornett Date: Sat, 31 Jan 2026 14:50:11 -0500 Subject: [PATCH] GPU: Query UnrestrictedBufferTextureCopyPitchSupported to avoid D3D12 realignment copies (cherry picked from commit 742a6fd092836796d26a33590ef57d852142bf14) --- include/SDL3/SDL_gpu.h | 11 +++--- src/gpu/d3d12/SDL_gpu_d3d12.c | 63 +++++++++++++++++++++++++---------- 2 files changed, 52 insertions(+), 22 deletions(-) diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h index 2fe19429df..d1e95f6750 100644 --- a/include/SDL3/SDL_gpu.h +++ b/include/SDL3/SDL_gpu.h @@ -1398,11 +1398,12 @@ typedef struct SDL_GPUViewport * SDL_DownloadFromGPUTexture are used as default values respectively and data * is considered to be tightly packed. * - * **WARNING**: Direct3D 12 requires texture data row pitch to be 256 byte - * aligned, and offsets to be aligned to 512 bytes. If they are not, SDL will - * make a temporary copy of the data that is properly aligned, but this adds - * overhead to the transfer process. Apps can avoid this by aligning their - * data appropriately, or using a different GPU backend than Direct3D 12. + * **WARNING**: On some older/integrated hardware, Direct3D 12 requires texture + * data row pitch to be 256 byte aligned, and offsets to be aligned to 512 bytes. + * If they are not, SDL will make a temporary copy of the data that is properly + * aligned, but this adds overhead to the transfer process. Apps can avoid this + * by aligning their data appropriately, or using a different GPU backend than + * Direct3D 12. * * \since This struct is available since SDL 3.2.0. * diff --git a/src/gpu/d3d12/SDL_gpu_d3d12.c b/src/gpu/d3d12/SDL_gpu_d3d12.c index 278452294b..52c0b66dff 100644 --- a/src/gpu/d3d12/SDL_gpu_d3d12.c +++ b/src/gpu/d3d12/SDL_gpu_d3d12.c @@ -897,6 +897,7 @@ struct D3D12Renderer bool debug_mode; bool GPUUploadHeapSupported; + bool UnrestrictedBufferTextureCopyPitchSupported; // FIXME: these might not be necessary since we're not using custom heaps bool UMA; bool UMACacheCoherent; @@ -5914,6 +5915,7 @@ static void D3D12_UploadToTexture( bool cycle) { D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer; + D3D12Renderer *renderer = (D3D12Renderer *)d3d12CommandBuffer->renderer; D3D12BufferContainer *transferBufferContainer = (D3D12BufferContainer *)source->transfer_buffer; D3D12Buffer *temporaryBuffer = NULL; D3D12_TEXTURE_COPY_LOCATION sourceLocation; @@ -5941,11 +5943,12 @@ static void D3D12_UploadToTexture( cycle, D3D12_RESOURCE_STATE_COPY_DEST); - /* D3D12 requires texture data row pitch to be 256 byte aligned, which is obviously insane. - * Instead of exposing that restriction to the client, which is a huge rake to step on, - * and a restriction that no other backend requires, we're going to copy data to a temporary buffer, - * copy THAT data to the texture, and then get rid of the temporary buffer ASAP. - * If we're lucky and the row pitch and depth pitch are already aligned, we can skip all of that. + /* Unless the UnrestrictedBufferTextureCopyPitchSupported feature is supported, D3D12 requires + * texture data row pitch to be 256 byte aligned, which is obviously insane. Instead of exposing + * that restriction to the client, which is a huge rake to step on, and a restriction that no + * other backend requires, we're going to copy data to a temporary buffer, copy THAT data to the + * texture, and then get rid of the temporary buffer ASAP. If we're lucky and the row pitch and + * depth pitch are already aligned, we can skip all of that. * * D3D12 also requires offsets to be 512 byte aligned. We'll fix that for the client and warn them as well. * @@ -5967,10 +5970,16 @@ static void D3D12_UploadToTexture( bytesPerSlice = rowsPerSlice * rowPitch; - alignedRowPitch = (destination->w + (blockWidth - 1)) / blockWidth * blockSize; - alignedRowPitch = D3D12_INTERNAL_Align(alignedRowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - needsRealignment = rowsPerSlice != destination->h || rowPitch != alignedRowPitch; - needsPlacementCopy = source->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0; + if (renderer->UnrestrictedBufferTextureCopyPitchSupported) { + alignedRowPitch = rowPitch; + needsRealignment = false; + needsPlacementCopy = false; + } else { + alignedRowPitch = (destination->w + (blockWidth - 1)) / blockWidth * blockSize; + alignedRowPitch = D3D12_INTERNAL_Align(alignedRowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + needsRealignment = rowsPerSlice != destination->h || rowPitch != alignedRowPitch; + needsPlacementCopy = source->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0; + } alignedBytesPerSlice = alignedRowPitch * destination->h; @@ -6249,6 +6258,7 @@ static void D3D12_DownloadFromTexture( const SDL_GPUTextureTransferInfo *destination) { D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer; + D3D12Renderer *renderer = d3d12CommandBuffer->renderer; D3D12_TEXTURE_COPY_LOCATION sourceLocation; D3D12_TEXTURE_COPY_LOCATION destinationLocation; Uint32 pixelsPerRow = destination->pixels_per_row; @@ -6266,11 +6276,12 @@ static void D3D12_DownloadFromTexture( D3D12BufferContainer *destinationContainer = (D3D12BufferContainer *)destination->transfer_buffer; D3D12Buffer *destinationBuffer = destinationContainer->activeBuffer; - /* D3D12 requires texture data row pitch to be 256 byte aligned, which is obviously insane. - * Instead of exposing that restriction to the client, which is a huge rake to step on, - * and a restriction that no other backend requires, we're going to copy data to a temporary buffer, - * copy THAT data to the texture, and then get rid of the temporary buffer ASAP. - * If we're lucky and the row pitch and depth pitch are already aligned, we can skip all of that. + /* Unless the UnrestrictedBufferTextureCopyPitchSupported feature is supported, D3D12 requires + * texture data row pitch to be 256 byte aligned, which is obviously insane. Instead of exposing + * that restriction to the client, which is a huge rake to step on, and a restriction that no + * other backend requires, we're going to copy data to a temporary buffer, copy THAT data to the + * texture, and then get rid of the temporary buffer ASAP. If we're lucky and the row pitch and + * depth pitch are already aligned, we can skip all of that. * * D3D12 also requires offsets to be 512 byte aligned. We'll fix that for the client and warn them as well. * @@ -6290,9 +6301,15 @@ static void D3D12_DownloadFromTexture( rowsPerSlice = source->h; } - alignedRowPitch = D3D12_INTERNAL_Align(rowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - needsRealignment = rowsPerSlice != source->h || rowPitch != alignedRowPitch; - needsPlacementCopy = destination->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0; + if (renderer->UnrestrictedBufferTextureCopyPitchSupported) { + alignedRowPitch = rowPitch; + needsRealignment = false; + needsPlacementCopy = false; + } else { + alignedRowPitch = D3D12_INTERNAL_Align(rowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + needsRealignment = rowsPerSlice != source->h || rowPitch != alignedRowPitch; + needsPlacementCopy = destination->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0; + } sourceLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; sourceLocation.SubresourceIndex = sourceSubresource->index; @@ -9149,6 +9166,18 @@ static SDL_GPUDevice *D3D12_CreateDevice(bool debugMode, bool preferLowPower, SD } #endif + // Check for unrestricted texture-buffer copy pitch support + D3D12_FEATURE_DATA_D3D12_OPTIONS13 options13; + res = ID3D12Device_CheckFeatureSupport( + renderer->device, + D3D12_FEATURE_D3D12_OPTIONS13, + &options13, + sizeof(options13)); + + if (SUCCEEDED(res)) { + renderer->UnrestrictedBufferTextureCopyPitchSupported = options13.UnrestrictedBufferTextureCopyPitchSupported; + } + // Create command queue #if (defined(SDL_PLATFORM_XBOXONE) || defined(SDL_PLATFORM_XBOXSERIES)) if (s_CommandQueue != NULL) {