GPU: Simultaneous compute pass read-write (#10965)

This commit is contained in:
Evan Hemsley
2024-09-27 12:49:37 -07:00
committed by GitHub
parent d39acc6d1e
commit 05d0656bd6
8 changed files with 241 additions and 186 deletions

View File

@@ -732,10 +732,10 @@ struct D3D12CommandBuffer
D3D12Sampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D12Texture *computeReadOnlyStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
D3D12Buffer *computeReadOnlyStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
D3D12TextureSubresource *computeWriteOnlyStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
Uint32 computeWriteOnlyStorageTextureSubresourceCount;
D3D12Buffer *computeWriteOnlyStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
Uint32 computeWriteOnlyStorageBufferCount;
D3D12TextureSubresource *computeReadWriteStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
Uint32 computeReadWriteStorageTextureSubresourceCount;
D3D12Buffer *computeReadWriteStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
Uint32 computeReadWriteStorageBufferCount;
D3D12UniformBuffer *computeUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
// Resource tracking
@@ -825,8 +825,8 @@ typedef struct D3D12ComputeRootSignature
Sint32 samplerTextureRootIndex;
Sint32 readOnlyStorageTextureRootIndex;
Sint32 readOnlyStorageBufferRootIndex;
Sint32 writeOnlyStorageTextureRootIndex;
Sint32 writeOnlyStorageBufferRootIndex;
Sint32 readWriteStorageTextureRootIndex;
Sint32 readWriteStorageBufferRootIndex;
Sint32 uniformBufferRootIndex[MAX_UNIFORM_BUFFERS_PER_STAGE];
} D3D12ComputeRootSignature;
@@ -838,8 +838,8 @@ struct D3D12ComputePipeline
Uint32 numSamplers;
Uint32 numReadOnlyStorageTextures;
Uint32 numReadOnlyStorageBuffers;
Uint32 numWriteOnlyStorageTextures;
Uint32 numWriteOnlyStorageBuffers;
Uint32 numReadWriteStorageTextures;
Uint32 numReadWriteStorageBuffers;
Uint32 numUniformBuffers;
SDL_AtomicInt referenceCount;
@@ -1521,13 +1521,17 @@ static void D3D12_INTERNAL_TextureSubresourceBarrier(
D3D12_RESOURCE_STATES destinationState,
D3D12TextureSubresource *textureSubresource)
{
bool needsUAVBarrier =
(textureSubresource->parent->container->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) ||
(textureSubresource->parent->container->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE);
D3D12_INTERNAL_ResourceBarrier(
commandBuffer,
sourceState,
destinationState,
textureSubresource->parent->resource,
textureSubresource->index,
textureSubresource->parent->container->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE);
needsUAVBarrier);
}
static D3D12_RESOURCE_STATES D3D12_INTERNAL_DefaultTextureResourceState(
@@ -1547,6 +1551,8 @@ static D3D12_RESOURCE_STATES D3D12_INTERNAL_DefaultTextureResourceState(
return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
} else if (usageFlags & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) {
return D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
} else if (usageFlags & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE) {
return D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
} else {
SDL_LogError(SDL_LOG_CATEGORY_GPU, "Texture has no default usage mode!");
return D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE;
@@ -2174,8 +2180,8 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
d3d12ComputeRootSignature->samplerTextureRootIndex = -1;
d3d12ComputeRootSignature->readOnlyStorageTextureRootIndex = -1;
d3d12ComputeRootSignature->readOnlyStorageBufferRootIndex = -1;
d3d12ComputeRootSignature->writeOnlyStorageTextureRootIndex = -1;
d3d12ComputeRootSignature->writeOnlyStorageBufferRootIndex = -1;
d3d12ComputeRootSignature->readWriteStorageTextureRootIndex = -1;
d3d12ComputeRootSignature->readWriteStorageBufferRootIndex = -1;
for (Uint32 i = 0; i < MAX_UNIFORM_BUFFERS_PER_STAGE; i += 1) {
d3d12ComputeRootSignature->uniformBufferRootIndex[i] = -1;
@@ -2251,9 +2257,9 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
parameterCount += 1;
}
if (createInfo->num_writeonly_storage_textures) {
if (createInfo->num_readwrite_storage_textures) {
descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
descriptorRange.NumDescriptors = createInfo->num_writeonly_storage_textures;
descriptorRange.NumDescriptors = createInfo->num_readwrite_storage_textures;
descriptorRange.BaseShaderRegister = 0;
descriptorRange.RegisterSpace = 1;
descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
@@ -2264,15 +2270,15 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
rootParameter.DescriptorTable.pDescriptorRanges = &descriptorRanges[rangeCount];
rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // ALL is used for compute
rootParameters[parameterCount] = rootParameter;
d3d12ComputeRootSignature->writeOnlyStorageTextureRootIndex = parameterCount;
d3d12ComputeRootSignature->readWriteStorageTextureRootIndex = parameterCount;
rangeCount += 1;
parameterCount += 1;
}
if (createInfo->num_writeonly_storage_buffers) {
if (createInfo->num_readwrite_storage_buffers) {
descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
descriptorRange.NumDescriptors = createInfo->num_writeonly_storage_buffers;
descriptorRange.BaseShaderRegister = createInfo->num_writeonly_storage_textures;
descriptorRange.NumDescriptors = createInfo->num_readwrite_storage_buffers;
descriptorRange.BaseShaderRegister = createInfo->num_readwrite_storage_textures;
descriptorRange.RegisterSpace = 1;
descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
descriptorRanges[rangeCount] = descriptorRange;
@@ -2282,7 +2288,7 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
rootParameter.DescriptorTable.pDescriptorRanges = &descriptorRanges[rangeCount];
rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // ALL is used for compute
rootParameters[parameterCount] = rootParameter;
d3d12ComputeRootSignature->writeOnlyStorageBufferRootIndex = parameterCount;
d3d12ComputeRootSignature->readWriteStorageBufferRootIndex = parameterCount;
rangeCount += 1;
parameterCount += 1;
}
@@ -2409,8 +2415,8 @@ static SDL_GPUComputePipeline *D3D12_CreateComputePipeline(
computePipeline->numSamplers = createinfo->num_samplers;
computePipeline->numReadOnlyStorageTextures = createinfo->num_readonly_storage_textures;
computePipeline->numReadOnlyStorageBuffers = createinfo->num_readonly_storage_buffers;
computePipeline->numWriteOnlyStorageTextures = createinfo->num_writeonly_storage_textures;
computePipeline->numWriteOnlyStorageBuffers = createinfo->num_writeonly_storage_buffers;
computePipeline->numReadWriteStorageTextures = createinfo->num_readwrite_storage_textures;
computePipeline->numReadWriteStorageBuffers = createinfo->num_readwrite_storage_buffers;
computePipeline->numUniformBuffers = createinfo->num_uniform_buffers;
SDL_SetAtomicInt(&computePipeline->referenceCount, 0);
@@ -2792,6 +2798,9 @@ static D3D12Texture *D3D12_INTERNAL_CreateTexture(
D3D12_RESOURCE_STATES initialState = (D3D12_RESOURCE_STATES)0;
D3D12_CLEAR_VALUE clearValue;
bool useClearValue = false;
bool needsUAV =
(createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) ||
(createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE);
HRESULT res;
texture = (D3D12Texture *)SDL_calloc(1, sizeof(D3D12Texture));
@@ -2819,7 +2828,7 @@ static D3D12Texture *D3D12_INTERNAL_CreateTexture(
clearValue.DepthStencil.Stencil = (UINT8)SDL_GetNumberProperty(createinfo->props, SDL_PROP_GPU_CREATETEXTURE_D3D12_CLEAR_STENCIL_UINT8, 0);
}
if (createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) {
if (needsUAV) {
resourceFlags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
}
@@ -3025,7 +3034,7 @@ static D3D12Texture *D3D12_INTERNAL_CreateTexture(
}
// Create subresource UAV if necessary
if (createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) {
if (needsUAV) {
D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc;
D3D12_INTERNAL_AssignCpuDescriptorHandle(
@@ -4877,26 +4886,23 @@ static void D3D12_EndRenderPass(
static void D3D12_BeginComputePass(
SDL_GPUCommandBuffer *commandBuffer,
const SDL_GPUStorageTextureWriteOnlyBinding *storageTextureBindings,
const SDL_GPUStorageTextureReadWriteBinding *storageTextureBindings,
Uint32 numStorageTextureBindings,
const SDL_GPUStorageBufferWriteOnlyBinding *storageBufferBindings,
const SDL_GPUStorageBufferReadWriteBinding *storageBufferBindings,
Uint32 numStorageBufferBindings)
{
D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresourceCount = numStorageTextureBindings;
d3d12CommandBuffer->computeWriteOnlyStorageBufferCount = numStorageBufferBindings;
d3d12CommandBuffer->computeReadWriteStorageTextureSubresourceCount = numStorageTextureBindings;
d3d12CommandBuffer->computeReadWriteStorageBufferCount = numStorageBufferBindings;
/* Write-only resources will be actually bound in BindComputePipeline
/* Read-write resources will be actually bound in BindComputePipeline
* after the root signature is set.
* We also have to scan to see which barriers we actually need because depth slices aren't separate subresources
*/
if (numStorageTextureBindings > 0) {
for (Uint32 i = 0; i < numStorageTextureBindings; i += 1) {
D3D12TextureContainer *container = (D3D12TextureContainer *)storageTextureBindings[i].texture;
if (!(container->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE)) {
SDL_LogError(SDL_LOG_CATEGORY_GPU, "Attempted to bind read-only texture as compute write texture");
}
D3D12TextureSubresource *subresource = D3D12_INTERNAL_PrepareTextureSubresourceForWrite(
d3d12CommandBuffer,
@@ -4906,7 +4912,7 @@ static void D3D12_BeginComputePass(
storageTextureBindings[i].cycle,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresources[i] = subresource;
d3d12CommandBuffer->computeReadWriteStorageTextureSubresources[i] = subresource;
D3D12_INTERNAL_TrackTexture(
d3d12CommandBuffer,
@@ -4917,16 +4923,14 @@ static void D3D12_BeginComputePass(
if (numStorageBufferBindings > 0) {
for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) {
D3D12BufferContainer *container = (D3D12BufferContainer *)storageBufferBindings[i].buffer;
if (!(container->usage & SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_WRITE)) {
SDL_LogError(SDL_LOG_CATEGORY_GPU, "Attempted to bind read-only texture as compute write texture");
}
D3D12Buffer *buffer = D3D12_INTERNAL_PrepareBufferForWrite(
d3d12CommandBuffer,
container,
storageBufferBindings[i].cycle,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
d3d12CommandBuffer->computeWriteOnlyStorageBuffers[i] = buffer;
d3d12CommandBuffer->computeReadWriteStorageBuffers[i] = buffer;
D3D12_INTERNAL_TrackBuffer(
d3d12CommandBuffer,
@@ -4972,39 +4976,39 @@ static void D3D12_BindComputePipeline(
D3D12_INTERNAL_TrackComputePipeline(d3d12CommandBuffer, pipeline);
// Bind write-only resources after setting root signature
if (pipeline->numWriteOnlyStorageTextures > 0) {
for (Uint32 i = 0; i < pipeline->numWriteOnlyStorageTextures; i += 1) {
cpuHandles[i] = d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]->uavHandle.cpuHandle;
if (pipeline->numReadWriteStorageTextures > 0) {
for (Uint32 i = 0; i < pipeline->numReadWriteStorageTextures; i += 1) {
cpuHandles[i] = d3d12CommandBuffer->computeReadWriteStorageTextureSubresources[i]->uavHandle.cpuHandle;
}
D3D12_INTERNAL_WriteGPUDescriptors(
d3d12CommandBuffer,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
cpuHandles,
d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresourceCount,
d3d12CommandBuffer->computeReadWriteStorageTextureSubresourceCount,
&gpuDescriptorHandle);
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(
d3d12CommandBuffer->graphicsCommandList,
d3d12CommandBuffer->currentComputePipeline->rootSignature->writeOnlyStorageTextureRootIndex,
d3d12CommandBuffer->currentComputePipeline->rootSignature->readWriteStorageTextureRootIndex,
gpuDescriptorHandle);
}
if (pipeline->numWriteOnlyStorageBuffers > 0) {
for (Uint32 i = 0; i < pipeline->numWriteOnlyStorageBuffers; i += 1) {
cpuHandles[i] = d3d12CommandBuffer->computeWriteOnlyStorageBuffers[i]->uavDescriptor.cpuHandle;
if (pipeline->numReadWriteStorageBuffers > 0) {
for (Uint32 i = 0; i < pipeline->numReadWriteStorageBuffers; i += 1) {
cpuHandles[i] = d3d12CommandBuffer->computeReadWriteStorageBuffers[i]->uavDescriptor.cpuHandle;
}
D3D12_INTERNAL_WriteGPUDescriptors(
d3d12CommandBuffer,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
cpuHandles,
d3d12CommandBuffer->computeWriteOnlyStorageBufferCount,
d3d12CommandBuffer->computeReadWriteStorageBufferCount,
&gpuDescriptorHandle);
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(
d3d12CommandBuffer->graphicsCommandList,
d3d12CommandBuffer->currentComputePipeline->rootSignature->writeOnlyStorageBufferRootIndex,
d3d12CommandBuffer->currentComputePipeline->rootSignature->readWriteStorageBufferRootIndex,
gpuDescriptorHandle);
}
}
@@ -5257,29 +5261,29 @@ static void D3D12_EndComputePass(
{
D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
for (Uint32 i = 0; i < d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresourceCount; i += 1) {
if (d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]) {
for (Uint32 i = 0; i < d3d12CommandBuffer->computeReadWriteStorageTextureSubresourceCount; i += 1) {
if (d3d12CommandBuffer->computeReadWriteStorageTextureSubresources[i]) {
D3D12_INTERNAL_TextureSubresourceTransitionToDefaultUsage(
d3d12CommandBuffer,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]);
d3d12CommandBuffer->computeReadWriteStorageTextureSubresources[i]);
d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresources[i] = NULL;
d3d12CommandBuffer->computeReadWriteStorageTextureSubresources[i] = NULL;
}
}
d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresourceCount = 0;
d3d12CommandBuffer->computeReadWriteStorageTextureSubresourceCount = 0;
for (Uint32 i = 0; i < d3d12CommandBuffer->computeWriteOnlyStorageBufferCount; i += 1) {
if (d3d12CommandBuffer->computeWriteOnlyStorageBuffers[i]) {
for (Uint32 i = 0; i < d3d12CommandBuffer->computeReadWriteStorageBufferCount; i += 1) {
if (d3d12CommandBuffer->computeReadWriteStorageBuffers[i]) {
D3D12_INTERNAL_BufferTransitionToDefaultUsage(
d3d12CommandBuffer,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
d3d12CommandBuffer->computeWriteOnlyStorageBuffers[i]);
d3d12CommandBuffer->computeReadWriteStorageBuffers[i]);
d3d12CommandBuffer->computeWriteOnlyStorageBuffers[i] = NULL;
d3d12CommandBuffer->computeReadWriteStorageBuffers[i] = NULL;
}
}
d3d12CommandBuffer->computeWriteOnlyStorageBufferCount = 0;
d3d12CommandBuffer->computeReadWriteStorageBufferCount = 0;
for (Uint32 i = 0; i < MAX_STORAGE_TEXTURES_PER_STAGE; i += 1) {
if (d3d12CommandBuffer->computeReadOnlyStorageTextures[i]) {
@@ -6881,8 +6885,8 @@ static SDL_GPUCommandBuffer *D3D12_AcquireCommandBuffer(
SDL_zeroa(commandBuffer->computeSamplers);
SDL_zeroa(commandBuffer->computeReadOnlyStorageTextures);
SDL_zeroa(commandBuffer->computeReadOnlyStorageBuffers);
SDL_zeroa(commandBuffer->computeWriteOnlyStorageTextureSubresources);
SDL_zeroa(commandBuffer->computeWriteOnlyStorageBuffers);
SDL_zeroa(commandBuffer->computeReadWriteStorageTextureSubresources);
SDL_zeroa(commandBuffer->computeReadWriteStorageBuffers);
SDL_zeroa(commandBuffer->computeUniformBuffers);
commandBuffer->autoReleaseFence = true;
@@ -7555,6 +7559,9 @@ static bool D3D12_SupportsTextureFormat(
if ((usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) && !(formatSupport.Support2 & D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE)) {
return false;
}
if ((usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE) && !(formatSupport.Support2 & D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD)) {
return false;
}
if ((usage & SDL_GPU_TEXTUREUSAGE_COLOR_TARGET) && !(formatSupport.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET)) {
return false;
}