From f18ce27edfb95b6f21ba48fe9778bc54643f51bd Mon Sep 17 00:00:00 2001 From: Ruslan Kutdusov Date: Sat, 19 Apr 2025 23:29:35 +0100 Subject: [PATCH 1/6] retarget solution to v143, enable debug info for shaders --- perftest/perftest.vcxproj | 605 +++++--------------------------------- 1 file changed, 79 insertions(+), 526 deletions(-) diff --git a/perftest/perftest.vcxproj b/perftest/perftest.vcxproj index 58b3164..5b666fc 100644 --- a/perftest/perftest.vcxproj +++ b/perftest/perftest.vcxproj @@ -27,26 +27,26 @@ Application true - v142 + v143 MultiByte Application false - v142 + v143 true MultiByte Application true - v142 + v143 MultiByte Application false - v142 + v143 true MultiByte @@ -75,6 +75,12 @@ Disabled true + + true + Compute + 5.0 + $(ProjectDir)\shaders\%(Filename).cso + @@ -85,6 +91,12 @@ DXGI.lib;d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + true + Compute + 5.0 + $(ProjectDir)\shaders\%(Filename).cso + @@ -98,6 +110,12 @@ true true + + true + Compute + 5.0 + $(ProjectDir)\shaders\%(Filename).cso + @@ -112,6 +130,12 @@ true DXGI.lib;d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + true + Compute + 5.0 + $(ProjectDir)\shaders\%(Filename).cso + @@ -129,528 +153,57 @@ - - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - - - Compute - 5.0 - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - false - false - - - Compute - 5.0 - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - false - false - - - Compute - 5.0 - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - false - false - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - Compute - 5.0 - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - false - false - - - Compute - 5.0 - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - false - false - - - Compute - 5.0 - false - false - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - false - false - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - false - false - Compute - 5.0 - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - $(ProjectDir)\shaders\%(Filename).cso - - - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - false - false - $(ProjectDir)\shaders\%(Filename).cso - - - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - false - false - $(ProjectDir)\shaders\%(Filename).cso - - - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - false - false - $(ProjectDir)\shaders\%(Filename).cso - - - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - false - false - $(ProjectDir)\shaders\%(Filename).cso - - - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - false - false - $(ProjectDir)\shaders\%(Filename).cso - - - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - false - false - $(ProjectDir)\shaders\%(Filename).cso - - - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - false - false - $(ProjectDir)\shaders\%(Filename).cso - - - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - false - false - $(ProjectDir)\shaders\%(Filename).cso - - - Compute - 5.0 - $(ProjectDir)\shaders\%(Filename).cso - Compute - 5.0 - false - false - $(ProjectDir)\shaders\%(Filename).cso - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From d8a0b5f273535186bf3970680a6d17b5ec3d56b5 Mon Sep 17 00:00:00 2001 From: Ruslan Kutdusov Date: Sun, 20 Apr 2025 16:36:22 +0100 Subject: [PATCH 2/6] average and std dev --- perftest/main.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/perftest/main.cpp b/perftest/main.cpp index c712553..1f9dd32 100644 --- a/perftest/main.cpp +++ b/perftest/main.cpp @@ -3,6 +3,7 @@ #include "graphicsUtil.h" #include "loadConstantsGPU.h" #include +#include class BenchTest { @@ -207,7 +208,8 @@ int main(int argc, char *argv[]) struct TestCaseTiming { std::string name; - float totalTime; + float totalTime = 0.0f; + std::vector timings; }; std::array timingResults; @@ -226,6 +228,7 @@ int main(int argc, char *argv[]) timingResults[id] = { name, 0 }; } timingResults[id].totalTime += timeMillis; + timingResults[id].timings.push_back(timeMillis); } }); @@ -418,7 +421,18 @@ int main(int argc, char *argv[]) for (auto&& row : timingResults) { if (row.name == "") break; - printf("%s: %.3fms %.3fx\n", row.name.c_str(), row.totalTime, compareToTime / row.totalTime); + float average = row.totalTime / row.timings.size(); + float stdDev = 0.0f; + for (float t : row.timings) + stdDev += std::powf(t - average, 2.0f); + stdDev = std::sqrtf(stdDev / row.timings.size()); + printf( + "%s: %.3fms %.3fms %.3fms %.3fx\n", + row.name.c_str(), + row.totalTime, + average, + stdDev, + compareToTime / row.totalTime); } return 0; From 35deca2a9b55e016c1937fabf2c941ebe3bc0cf5 Mon Sep 17 00:00:00 2001 From: Ruslan Kutdusov Date: Sun, 20 Apr 2025 17:18:28 +0100 Subject: [PATCH 3/6] annotate shaders and markers --- perftest/directx.cpp | 17 ++++++++++++++++- perftest/directx.h | 5 +++-- perftest/graphicsUtil.h | 2 +- perftest/perftest.vcxproj | 4 ++-- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/perftest/directx.cpp b/perftest/directx.cpp index a915d1d..efaf141 100644 --- a/perftest/directx.cpp +++ b/perftest/directx.cpp @@ -63,6 +63,8 @@ DirectXDevice::DirectXDevice(HWND window, uint2 resolution, IDXGIAdapter* adapte assert(SUCCEEDED(result)); + deviceContext->QueryInterface(IID_PPV_ARGS(&userDefinedAnnotation)); + D3D11_VIEWPORT viewport; viewport.Height = (float)resolution.y; viewport.Width = (float)resolution.x; @@ -347,11 +349,12 @@ ID3D11SamplerState* DirectXDevice::createSampler(SamplerType type) return sampler; } -ID3D11ComputeShader* DirectXDevice::createComputeShader(const std::vector &shaderBytes) +ID3D11ComputeShader* DirectXDevice::createComputeShader(const std::string& name, const std::vector &shaderBytes) { ID3D11ComputeShader* shader = nullptr; HRESULT result = device->CreateComputeShader(shaderBytes.data(), shaderBytes.size(), nullptr, &shader); assert(SUCCEEDED(result)); + shader->SetPrivateData(WKPDID_D3DDebugObjectName, (UINT)name.length(), name.c_str()); return shader; } @@ -465,6 +468,15 @@ void DirectXDevice::clearUAV(ID3D11UnorderedAccessView* uav, std::array wname; + wname.resize(name.length() + 1); + size_t wnameLen = 0; + mbstowcs_s(&wnameLen, wname.data(), wname.size(), name.c_str(), name.length()); + userDefinedAnnotation->BeginEvent((const wchar_t*)wname.data()); + } + PerformanceQuery& query = queries[queryCounter % queries.size()]; query.id = id; @@ -483,6 +495,9 @@ void DirectXDevice::endPerformanceQuery(QueryHandle queryHandle) deviceContext->End(query.end); // NOTE: timestamp queries don't use Begin(), only End() deviceContext->End(query.disjoint); + + if (userDefinedAnnotation) + userDefinedAnnotation->EndEvent(); } void DirectXDevice::processPerformanceResults(const std::function& functor) diff --git a/perftest/directx.h b/perftest/directx.h index 0b76363..0536702 100644 --- a/perftest/directx.h +++ b/perftest/directx.h @@ -2,7 +2,7 @@ #include "datatypes.h" #include "com_ptr.h" #include -#include +#include #include #include #include @@ -49,7 +49,7 @@ class DirectXDevice ID3D11UnorderedAccessView* createBackBufferUAV(); ID3D11DepthStencilView* createDepthStencilView(uint2 size); ID3D11RenderTargetView* DirectXDevice::createBackBufferRTV(); - ID3D11ComputeShader* createComputeShader(const std::vector& shaderBytes); + ID3D11ComputeShader* createComputeShader(const std::string& name, const std::vector& shaderBytes); ID3D11Buffer* createConstantBuffer(unsigned bytes); ID3D11Buffer* createBuffer(unsigned numElements, unsigned strideBytes, BufferType type = BufferType::Default); @@ -109,6 +109,7 @@ class DirectXDevice com_ptr swapChain; com_ptr device; com_ptr deviceContext; + com_ptr userDefinedAnnotation; // Queries std::array queries; diff --git a/perftest/graphicsUtil.h b/perftest/graphicsUtil.h index 83b299d..470dbba 100644 --- a/perftest/graphicsUtil.h +++ b/perftest/graphicsUtil.h @@ -5,6 +5,6 @@ inline ID3D11ComputeShader *loadComputeShader(DirectXDevice &dx, const std::string &filename) { auto shaderBlob = loadFile(filename); - return dx.createComputeShader(shaderBlob); + return dx.createComputeShader(filename, shaderBlob); } diff --git a/perftest/perftest.vcxproj b/perftest/perftest.vcxproj index 5b666fc..008d384 100644 --- a/perftest/perftest.vcxproj +++ b/perftest/perftest.vcxproj @@ -89,7 +89,7 @@ true - DXGI.lib;d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + DXGI.lib;d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;dxguid.lib;%(AdditionalDependencies) true @@ -128,7 +128,7 @@ true true - DXGI.lib;d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + DXGI.lib;d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;dxguid.lib;%(AdditionalDependencies) true From 9d28a13e315385456a21a8eb8a6c56a993af8549 Mon Sep 17 00:00:00 2001 From: Ruslan Kutdusov Date: Sat, 19 Apr 2025 23:23:13 +0100 Subject: [PATCH 4/6] moving to dx12 and dxc --- perftest/compile_shaders_5_1.bat | 7 + perftest/compile_shaders_6_0.bat | 7 + perftest/directx.cpp | 964 ++++++++++++++++++------------ perftest/directx.h | 186 ++++-- perftest/graphicsUtil.h | 2 +- perftest/loadConstantBody.hlsli | 6 + perftest/loadRawBody.hlsli | 7 + perftest/loadStructuredBody.hlsli | 7 + perftest/loadTexBody.hlsli | 7 + perftest/loadTypedBody.hlsli | 7 + perftest/main.cpp | 533 +++++++++-------- perftest/perftest.vcxproj | 28 +- perftest/perftest.vcxproj.filters | 1 + perftest/sampleTexBody.hlsli | 8 + 14 files changed, 1058 insertions(+), 712 deletions(-) create mode 100644 perftest/compile_shaders_5_1.bat create mode 100644 perftest/compile_shaders_6_0.bat diff --git a/perftest/compile_shaders_5_1.bat b/perftest/compile_shaders_5_1.bat new file mode 100644 index 0000000..78688f6 --- /dev/null +++ b/perftest/compile_shaders_5_1.bat @@ -0,0 +1,7 @@ +@echo off +setlocal enabledelayedexpansion + +for %%f in (*.hlsl) do ( + echo Compiling %%f... + fxc /T cs_5_1 /E main /Zi /Fo shaders\%%~nf.cso %%f +) diff --git a/perftest/compile_shaders_6_0.bat b/perftest/compile_shaders_6_0.bat new file mode 100644 index 0000000..593b921 --- /dev/null +++ b/perftest/compile_shaders_6_0.bat @@ -0,0 +1,7 @@ +@echo off +setlocal enabledelayedexpansion + +for %%f in (*.hlsl) do ( + echo Compiling %%f... + dxc /T cs_6_0 /E main /Zi /Fo shaders\%%~nf.cso /Fd shaders\ %%f +) diff --git a/perftest/directx.cpp b/perftest/directx.cpp index efaf141..8f6fd69 100644 --- a/perftest/directx.cpp +++ b/perftest/directx.cpp @@ -1,18 +1,163 @@ #include "directx.h" #include +#define USE_PIX 1 +#include -std::vector> enumerateAdapters() +ComPtr GDXGIFactory; + +static D3D12_RESOURCE_DESC InitBufferResourceDesc(size_t sizeInBytes) { - std::vector> adapters; + return + { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = 0, + .Width = sizeInBytes, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = + { + .Count = 1, + .Quality = 0, + }, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_NONE + }; +} - com_ptr factory; - if (FAILED(CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)&factory))) +static UINT Align(UINT value, UINT alignment) +{ + UINT mask = alignment - 1; + return (value + mask) & ~mask; +} + +ComputePSO::ComputePSO(ID3D12Device* device, const std::string& name, const std::vector& shaderBytes) +{ + D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = { - return adapters; + .CS = + { + .pShaderBytecode = shaderBytes.data(), + .BytecodeLength = shaderBytes.size() + } + }; + + HRESULT result = device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(pso.GetAddressOf())); + assert(SUCCEEDED(result)); + + std::vector wname; + wname.resize(name.length() + 1); + size_t wnameLen = 0; + mbstowcs_s(&wnameLen, wname.data(), wname.size(), name.c_str(), name.length()); + pso->SetName(wname.data()); + + ComPtr deserializer; + result = D3D12CreateRootSignatureDeserializer( + shaderBytes.data(), + shaderBytes.size(), + IID_PPV_ARGS(deserializer.GetAddressOf())); + assert(SUCCEEDED(result)); + + auto dxRootSigDesc = deserializer->GetRootSignatureDesc(); + for (uint32_t rootParamIdx = 0; rootParamIdx < dxRootSigDesc->NumParameters; rootParamIdx++) + { + Binding binding = { .rootParamIdx = rootParamIdx }; + const D3D12_ROOT_PARAMETER& rootParam = dxRootSigDesc->pParameters[rootParamIdx]; + if (rootParam.ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + { + binding.descriptorOffset = 0; + bool isSamplerDescriptorTable = false; + for (uint32_t rangeIdx = 0; rangeIdx < rootParam.DescriptorTable.NumDescriptorRanges; rangeIdx++) + { + auto& range = rootParam.DescriptorTable.pDescriptorRanges[rangeIdx]; + if (range.OffsetInDescriptorsFromTableStart != D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + binding.descriptorOffset = range.OffsetInDescriptorsFromTableStart; + + EBindingType bindingType = {}; + if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_SRV) + bindingType = EBindingType::kSrv; + else if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_CBV) + bindingType = EBindingType::kCbv; + else if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_UAV) + bindingType = EBindingType::kUav; + else if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER) + { + bindingType = EBindingType::kSampler; + isSamplerDescriptorTable = true; + } + + binding.isRootDescriptor = false; + for (uint32_t descriptorIdx = 0; descriptorIdx < range.NumDescriptors; descriptorIdx++) + { + uint32_t reg = range.BaseShaderRegister + descriptorIdx; + bindings[(int)bindingType][reg] = binding; + binding.descriptorOffset++; + } + } + + RootParameter ourRootParam = { + .type = rootParam.ParameterType, + .numDescriptors = binding.descriptorOffset, + .isSamplerDescriptorTable = isSamplerDescriptorTable + }; + rootSignatureDesc.push_back(ourRootParam); + } + else + { + EBindingType bindingType = {}; + if (rootParam.ParameterType == D3D12_ROOT_PARAMETER_TYPE_SRV) + bindingType = EBindingType::kSrv; + else if (rootParam.ParameterType == D3D12_ROOT_PARAMETER_TYPE_CBV) + bindingType = EBindingType::kCbv; + else if (rootParam.ParameterType == D3D12_ROOT_PARAMETER_TYPE_UAV) + bindingType = EBindingType::kUav; + else + assert(false); + + binding.isRootDescriptor = true; + bindings[(int)bindingType][rootParam.Descriptor.ShaderRegister] = binding; + + RootParameter ourRootParam = { + .type = rootParam.ParameterType, + .numDescriptors = 1 + }; + rootSignatureDesc.push_back(ourRootParam); + } } - IDXGIAdapter* adapter; - for (UINT i = 0; factory->EnumAdapters(i, &adapter) != DXGI_ERROR_NOT_FOUND; ++i) + result = device->CreateRootSignature( + 0, + shaderBytes.data(), + shaderBytes.size(), + IID_PPV_ARGS(rootSig.GetAddressOf())); + assert(SUCCEEDED(result)); +} + +const ComputePSO::Binding* ComputePSO::getBinding(uint32_t slot, EBindingType type) const +{ + auto& map = bindings[(int)type]; + auto iterator = map.find(slot); + if (iterator == map.end()) + return nullptr; + return &iterator->second; +} + +std::vector> enumerateAdapters() +{ + std::vector> adapters; + +#if _DEBUG + UINT dxgiFactoryFlag = DXGI_CREATE_FACTORY_DEBUG; +#else + UINT dxgiFactoryFlag = 0; +#endif + + HRESULT hr = CreateDXGIFactory2(dxgiFactoryFlag, IID_PPV_ARGS(GDXGIFactory.GetAddressOf())); + assert(SUCCEEDED(hr)); + + ComPtr adapter; + for (UINT i = 0; GDXGIFactory->EnumAdapters(i, adapter.GetAddressOf()) != DXGI_ERROR_NOT_FOUND; ++i) { adapters.push_back(adapter); } @@ -23,510 +168,555 @@ std::vector> enumerateAdapters() DirectXDevice::DirectXDevice(HWND window, uint2 resolution, IDXGIAdapter* adapter) : windowHandle(window), resolution(resolution) - { -#ifdef _DEBUG - UINT flags = D3D11_CREATE_DEVICE_DEBUG; -#else - UINT flags = 0; +#if _DEBUG + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(debugInterface.GetAddressOf())))) + { + debugInterface->EnableDebugLayer(); + //debugInterface->SetEnableGPUBasedValidation(true); + } #endif - D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0; - - DXGI_SWAP_CHAIN_DESC swapDesc; - ZeroMemory(&swapDesc, sizeof(DXGI_SWAP_CHAIN_DESC)); - swapDesc.BufferDesc.Width = resolution.x; - swapDesc.BufferDesc.Height = resolution.y; - swapDesc.BufferDesc.RefreshRate.Numerator = 60; - swapDesc.BufferDesc.RefreshRate.Denominator = 1; - swapDesc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - swapDesc.SampleDesc.Count = 1; - swapDesc.BufferUsage = DXGI_USAGE_UNORDERED_ACCESS | DXGI_USAGE_RENDER_TARGET_OUTPUT; // RT needed for GDI text output - swapDesc.BufferCount = 1; - swapDesc.OutputWindow = window; - swapDesc.Windowed = true; - swapDesc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; - swapDesc.Flags = 0; - - HRESULT result = D3D11CreateDeviceAndSwapChain( - adapter, - D3D_DRIVER_TYPE_UNKNOWN, - nullptr, // software rasterizer - flags, - &featureLevel, - 1, // num feature levels - D3D11_SDK_VERSION, // sdk version - &swapDesc, - &swapChain, - &device, - nullptr, // selected feature level - &deviceContext); - assert(SUCCEEDED(result)); - - deviceContext->QueryInterface(IID_PPV_ARGS(&userDefinedAnnotation)); + D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_12_0; - D3D11_VIEWPORT viewport; - viewport.Height = (float)resolution.y; - viewport.Width = (float)resolution.x; - viewport.MaxDepth = 1.0f; - viewport.MinDepth = 0.0f; - viewport.TopLeftX = 0.0f; - viewport.TopLeftY = 0.0f; - deviceContext->RSSetViewports(1, &viewport); + HRESULT result = D3D12CreateDevice(adapter, featureLevel, IID_PPV_ARGS(device.GetAddressOf())); + assert(SUCCEEDED(result)); - // Queries - for (auto &&q : queries) +#if _DEBUG + ComPtr pInfoQueue; + if (SUCCEEDED(device->QueryInterface(IID_PPV_ARGS(pInfoQueue.GetAddressOf())))) { - D3D11_QUERY_DESC desc; - ZeroMemory(&desc, sizeof(desc)); - desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; - device->CreateQuery(&desc, &q.disjoint); - desc.Query = D3D11_QUERY_TIMESTAMP; - device->CreateQuery(&desc, &q.start); - device->CreateQuery(&desc, &q.end); + pInfoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, true); + pInfoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, true); } -} +#endif -ID3D11UnorderedAccessView* DirectXDevice::createBackBufferUAV() -{ - ID3D11Texture2D* backBuffer = nullptr; - HRESULT result = swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID*)&backBuffer); + D3D12_COMMAND_QUEUE_DESC queueDesc = + { + .Type = D3D12_COMMAND_LIST_TYPE_DIRECT, + .NodeMask = 1 + }; + result = device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(cmdQueue.GetAddressOf())); assert(SUCCEEDED(result)); - ID3D11UnorderedAccessView *view = nullptr; - result = device->CreateUnorderedAccessView(backBuffer, nullptr, &view); + fenceLastSignalVal = 0; + result = device->CreateFence(fenceLastSignalVal, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf())); assert(SUCCEEDED(result)); - backBuffer->Release(); - return view; -} + fenceEvent = CreateEvent(nullptr, false, false, nullptr); + assert(fenceEvent); -ID3D11DepthStencilView* DirectXDevice::createDepthStencilView(uint2 size) -{ - D3D11_TEXTURE2D_DESC texDesc; - texDesc.ArraySize = 1; - texDesc.BindFlags = D3D11_BIND_DEPTH_STENCIL; - texDesc.CPUAccessFlags = 0; - texDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; - texDesc.Width = (UINT)size.x; - texDesc.Height = (UINT)size.y; - texDesc.MipLevels = 1; - texDesc.MiscFlags = 0; - texDesc.SampleDesc.Count = 1; - texDesc.SampleDesc.Quality = 0; - texDesc.Usage = D3D11_USAGE_DEFAULT; - - HRESULT result; - - ID3D11Texture2D* depthStencil = nullptr; - result = device->CreateTexture2D(&texDesc, NULL, &depthStencil); + result = device->CreateCommandAllocator(queueDesc.Type, IID_PPV_ARGS(cmdAllocator.GetAddressOf())); + assert(SUCCEEDED(result)); + + result = device->CreateCommandList( + 0, + queueDesc.Type, + cmdAllocator.Get(), + nullptr, + IID_PPV_ARGS(cmdList.GetAddressOf())); + assert(SUCCEEDED(result)); + cmdList->Close(); + + DXGI_SWAP_CHAIN_DESC1 swapDesc = { + .Width = resolution.x, + .Height = resolution.y, + .Format = DXGI_FORMAT_R8G8B8A8_UNORM, + .SampleDesc = { + .Count = 1, + .Quality = 0}, + .BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT, // RT needed for GDI text output + .BufferCount = 2, + .Scaling = DXGI_SCALING_NONE, + .SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL, + .Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH }; + + result = GDXGIFactory->CreateSwapChainForHwnd( + cmdQueue.Get(), + window, + &swapDesc, + nullptr, + nullptr, + swapChain.GetAddressOf()); assert(SUCCEEDED(result)); - D3D11_DEPTH_STENCIL_VIEW_DESC depthViewDesc; - depthViewDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; - depthViewDesc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D; - depthViewDesc.Flags = 0; - depthViewDesc.Texture2D.MipSlice = 0; + D3D12_QUERY_HEAP_DESC queryHeapDesc = { + .Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP, + .Count = (UINT)queries.size() * 2}; + result = device->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(queryHeap.GetAddressOf())); + assert(SUCCEEDED(result)); - ID3D11DepthStencilView *depthStencilView = nullptr; - result = device->CreateDepthStencilView(depthStencil, &depthViewDesc, &depthStencilView); + D3D12_RESOURCE_DESC resourceDesc = InitBufferResourceDesc(queryHeapDesc.Count * sizeof(uint64_t)); + D3D12_HEAP_PROPERTIES heapProps = { .Type = D3D12_HEAP_TYPE_READBACK }; + result = device->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(queryResultBuffer.GetAddressOf())); assert(SUCCEEDED(result)); - depthStencil->Release(); - return depthStencilView; + D3D12_DESCRIPTOR_HEAP_DESC heapDesc = { + .Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + .NumDescriptors = 100'000, + .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE}; + result = device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(cbvSrvUavDescriptorHeap.GetAddressOf())); + + heapDesc = { + .Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + .NumDescriptors = 1'000, + .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE }; + result = device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(samplerDescriptorHeap.GetAddressOf())); } -ID3D11RenderTargetView* DirectXDevice::createBackBufferRTV() +DirectXDevice::~DirectXDevice() { - ID3D11Texture2D* backBuffer = nullptr; - HRESULT result = swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID*)&backBuffer); - assert(SUCCEEDED(result)); - - ID3D11RenderTargetView *view = nullptr; - result = device->CreateRenderTargetView(backBuffer, nullptr, &view); - assert(SUCCEEDED(result)); + cmdQueue->Signal(fence.Get(), ++fenceLastSignalVal); + HRESULT hr = fence->SetEventOnCompletion(fenceLastSignalVal, fenceEvent); + assert(SUCCEEDED(hr)); + WaitForSingleObject(fenceEvent, INFINITE); - backBuffer->Release(); - return view; + CloseHandle(fenceEvent); } -ID3D11Buffer* DirectXDevice::createConstantBuffer(unsigned bytes) +ComPtr DirectXDevice::createConstantBuffer(unsigned bytes) { - D3D11_BUFFER_DESC desc; - desc.ByteWidth = bytes; - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - desc.MiscFlags = 0; - desc.StructureByteStride = 0; - - ID3D11Buffer *buffer = nullptr; - HRESULT result = device->CreateBuffer(&desc, nullptr, &buffer); + auto resourceDesc = InitBufferResourceDesc(Align(bytes, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)); + D3D12_HEAP_PROPERTIES heapProps = { .Type = D3D12_HEAP_TYPE_UPLOAD }; + ComPtr resource; + HRESULT result = device->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(resource.GetAddressOf())); assert(SUCCEEDED(result)); - return buffer; + return resource; } -ID3D11Buffer* DirectXDevice::createBuffer(unsigned numElements, unsigned strideBytes, BufferType type) +ComPtr DirectXDevice::createBuffer(unsigned numElements, unsigned strideBytes) { - D3D11_BUFFER_DESC desc; - desc.ByteWidth = strideBytes * numElements; - desc.StructureByteStride = (type == BufferType::Structured) ? strideBytes : 0; - desc.Usage = D3D11_USAGE_DEFAULT; - desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; - desc.CPUAccessFlags = 0; - desc.MiscFlags = 0; - - if (type == BufferType::Structured) - desc.MiscFlags |= D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; - - if (type == BufferType::ByteAddress) - desc.MiscFlags |= D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; - - ID3D11Buffer *buffer = nullptr; - HRESULT result = device->CreateBuffer(&desc, nullptr, &buffer); + auto resourceDesc = InitBufferResourceDesc(strideBytes * numElements); + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + D3D12_HEAP_PROPERTIES heapProps = { .Type = D3D12_HEAP_TYPE_DEFAULT }; + ComPtr resource; + HRESULT result = device->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + IID_PPV_ARGS(resource.GetAddressOf())); assert(SUCCEEDED(result)); - return buffer; + return resource; } -ID3D11Texture2D* DirectXDevice::createTexture2d(uint2 dimensions, DXGI_FORMAT format, unsigned mips) +ComPtr DirectXDevice::createTexture2d(uint2 dimensions, DXGI_FORMAT format, unsigned mips) { - D3D11_TEXTURE2D_DESC desc; - desc.Width = dimensions.x; - desc.Height = dimensions.y; - desc.ArraySize = 1; - desc.SampleDesc.Count = 1; - desc.SampleDesc.Quality = 0; - desc.MipLevels = mips; - desc.Format = format; - desc.Usage = D3D11_USAGE_DEFAULT; - desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; - desc.CPUAccessFlags = 0; - desc.MiscFlags = 0; - - ID3D11Texture2D *texture = nullptr; - HRESULT result = device->CreateTexture2D(&desc, nullptr, &texture); + D3D12_RESOURCE_DESC textureDesc = { + .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D, + .Alignment = 0, + .Width = dimensions.x, + .Height = dimensions.y, + .DepthOrArraySize = 1, + .MipLevels = (UINT16)mips, + .Format = format, + .SampleDesc = + { + .Count = 1, + .Quality = 0, + }, + .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS}; + + D3D12_HEAP_PROPERTIES heapProps = { .Type = D3D12_HEAP_TYPE_DEFAULT }; + ComPtr resource; + HRESULT result = device->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &textureDesc, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + IID_PPV_ARGS(resource.GetAddressOf())); assert(SUCCEEDED(result)); - return texture; + return resource; } -ID3D11Texture3D* DirectXDevice::createTexture3d(uint3 dimensions, DXGI_FORMAT format, unsigned mips) +ComPtr DirectXDevice::createTexture3d(uint3 dimensions, DXGI_FORMAT format, unsigned mips) { - D3D11_TEXTURE3D_DESC desc; - desc.Width = dimensions.x; - desc.Height = dimensions.y; - desc.Depth = dimensions.z; - desc.MipLevels = mips; - desc.Format = format; - desc.Usage = D3D11_USAGE_DEFAULT; - desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; - desc.CPUAccessFlags = 0; - desc.MiscFlags = 0; - - ID3D11Texture3D *texture = nullptr; - HRESULT result = device->CreateTexture3D(&desc, nullptr, &texture); + D3D12_RESOURCE_DESC textureDesc = { + .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D, + .Alignment = 0, + .Width = dimensions.x, + .Height = dimensions.y, + .DepthOrArraySize = (UINT16)dimensions.z, + .MipLevels = (UINT16)mips, + .Format = format, + .SampleDesc = + { + .Count = 1, + .Quality = 0, + }, + .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS}; + + D3D12_HEAP_PROPERTIES heapProps = { .Type = D3D12_HEAP_TYPE_DEFAULT }; + ComPtr resource; + HRESULT result = device->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &textureDesc, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + IID_PPV_ARGS(resource.GetAddressOf())); assert(SUCCEEDED(result)); - return texture; + return resource; } -ID3D11UnorderedAccessView* DirectXDevice::createUAV(ID3D11Resource *buffer) +UnorderedAccessView DirectXDevice::createUAV(ID3D12Resource *buffer) { - ID3D11UnorderedAccessView *view = nullptr; - HRESULT result = device->CreateUnorderedAccessView(buffer, nullptr, &view); - assert(SUCCEEDED(result)); - return view; + return UnorderedAccessView(buffer, {}); } -ID3D11UnorderedAccessView* DirectXDevice::createByteAddressUAV(ID3D11Resource *buffer, unsigned numElements) +UnorderedAccessView DirectXDevice::createByteAddressUAV(ID3D12Resource *buffer, unsigned numElements) { - D3D11_UNORDERED_ACCESS_VIEW_DESC desc; - desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; - desc.Format = DXGI_FORMAT_R32_TYPELESS; - desc.Buffer.FirstElement = 0; - desc.Buffer.NumElements = numElements; - desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW; - - ID3D11UnorderedAccessView *view = nullptr; - HRESULT result = device->CreateUnorderedAccessView(buffer, &desc, &view); - assert(SUCCEEDED(result)); - return view; + D3D12_UNORDERED_ACCESS_VIEW_DESC desc = { + .Format = DXGI_FORMAT_R32_TYPELESS, + .ViewDimension = D3D12_UAV_DIMENSION_BUFFER, + .Buffer = { + .FirstElement = 0, + .NumElements = numElements, + .Flags = D3D12_BUFFER_UAV_FLAG_RAW}}; + + return UnorderedAccessView(buffer, desc); } -ID3D11UnorderedAccessView* DirectXDevice::createTypedUAV(ID3D11Resource *buffer, unsigned numElements, DXGI_FORMAT format) +UnorderedAccessView DirectXDevice::createTypedUAV(ID3D12Resource *buffer, unsigned numElements, DXGI_FORMAT format) { - D3D11_UNORDERED_ACCESS_VIEW_DESC desc; - desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; - desc.Format = format; - desc.Buffer.FirstElement = 0; - desc.Buffer.NumElements = numElements; - desc.Buffer.Flags = 0; - - ID3D11UnorderedAccessView *view = nullptr; - HRESULT result = device->CreateUnorderedAccessView(buffer, &desc, &view); - assert(SUCCEEDED(result)); - return view; + D3D12_UNORDERED_ACCESS_VIEW_DESC desc = { + .Format = format, + .ViewDimension = D3D12_UAV_DIMENSION_BUFFER, + .Buffer = { + .FirstElement = 0, + .NumElements = numElements }}; + + return UnorderedAccessView(buffer, desc); } -ID3D11ShaderResourceView* DirectXDevice::createSRV(ID3D11Resource *resource) +ShaderResourceView DirectXDevice::createSRV(ID3D12Resource *resource) { - ID3D11ShaderResourceView *view = nullptr; - HRESULT result = device->CreateShaderResourceView(resource, nullptr, &view); - assert(SUCCEEDED(result)); - return view; + return ShaderResourceView(resource, {}); } -ID3D11ShaderResourceView* DirectXDevice::createTypedSRV(ID3D11Resource *buffer, unsigned numElements, DXGI_FORMAT format) +ShaderResourceView DirectXDevice::createTypedSRV(ID3D12Resource *buffer, unsigned numElements, DXGI_FORMAT format) { - D3D11_SHADER_RESOURCE_VIEW_DESC desc; - desc.ViewDimension = D3D_SRV_DIMENSION_BUFFER; - desc.Format = format; - desc.Buffer.FirstElement = 0; - desc.Buffer.NumElements = numElements; - - ID3D11ShaderResourceView *view = nullptr; - HRESULT result = device->CreateShaderResourceView(buffer, &desc, &view); - assert(SUCCEEDED(result)); - return view; + D3D12_SHADER_RESOURCE_VIEW_DESC desc = { + .Format = format, + .ViewDimension = D3D12_SRV_DIMENSION_BUFFER, + .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + .Buffer = { + .FirstElement = 0, + .NumElements = numElements }}; + + return ShaderResourceView(buffer, desc); } -ID3D11ShaderResourceView* DirectXDevice::createStructuredSRV(ID3D11Resource* buffer, unsigned numElements, unsigned stride) +ShaderResourceView DirectXDevice::createStructuredSRV(ID3D12Resource* buffer, unsigned numElements, unsigned stride) { - D3D11_SHADER_RESOURCE_VIEW_DESC desc; - desc.ViewDimension = D3D_SRV_DIMENSION_BUFFER; - desc.Format = DXGI_FORMAT_UNKNOWN; - desc.Buffer.FirstElement = 0; - desc.Buffer.NumElements = numElements; - - ID3D11ShaderResourceView *view = nullptr; - HRESULT result = device->CreateShaderResourceView(buffer, &desc, &view); - assert(SUCCEEDED(result)); - return view; + D3D12_SHADER_RESOURCE_VIEW_DESC desc = { + .Format = DXGI_FORMAT_UNKNOWN, + .ViewDimension = D3D12_SRV_DIMENSION_BUFFER, + .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + .Buffer = + { + .FirstElement = 0, + .NumElements = numElements, + .StructureByteStride = stride }}; + + return ShaderResourceView(buffer, desc); } -ID3D11ShaderResourceView* DirectXDevice::createByteAddressSRV(ID3D11Resource *buffer, unsigned numElements) +ShaderResourceView DirectXDevice::createByteAddressSRV(ID3D12Resource *buffer, unsigned numElements) { - D3D11_SHADER_RESOURCE_VIEW_DESC desc; - desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX; - desc.Format = DXGI_FORMAT_R32_TYPELESS; - desc.BufferEx.FirstElement = 0; - desc.BufferEx.NumElements = numElements; - desc.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW; - - ID3D11ShaderResourceView *view = nullptr; - HRESULT result = device->CreateShaderResourceView(buffer, &desc, &view); - assert(SUCCEEDED(result)); - return view; + D3D12_SHADER_RESOURCE_VIEW_DESC desc = { + .Format = DXGI_FORMAT_R32_TYPELESS, + .ViewDimension = D3D12_SRV_DIMENSION_BUFFER, + .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + .Buffer = + { + .FirstElement = 0, + .NumElements = numElements, + .Flags = D3D12_BUFFER_SRV_FLAG_RAW }}; + + return ShaderResourceView(buffer, desc); } -ID3D11SamplerState* DirectXDevice::createSampler(SamplerType type) +SamplerState DirectXDevice::createSampler(SamplerType type) { - D3D11_SAMPLER_DESC desc; - ZeroMemory(&desc, sizeof(desc)); - desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; - desc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP; - desc.AddressV = D3D11_TEXTURE_ADDRESS_WRAP; - desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; - desc.ComparisonFunc = D3D11_COMPARISON_NEVER; - desc.MaxLOD = D3D11_FLOAT32_MAX; + D3D12_SAMPLER_DESC desc = + { + .Filter = D3D12_FILTER_MIN_MAG_MIP_POINT, + .AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + .AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + .AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + .ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER, + .MaxLOD = D3D12_FLOAT32_MAX + }; switch (type) { case SamplerType::Nearest: - desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; + desc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; break; case SamplerType::Bilinear: - desc.Filter = D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT; + desc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; break; case SamplerType::Trilinear: - desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + desc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; break; } - ID3D11SamplerState *sampler = nullptr; - HRESULT result = device->CreateSamplerState(&desc, &sampler); - assert(SUCCEEDED(result)); - return sampler; + return SamplerState(desc); } -ID3D11ComputeShader* DirectXDevice::createComputeShader(const std::string& name, const std::vector &shaderBytes) +ComputePSO DirectXDevice::createComputeShader(const std::string& name, const std::vector &shaderBytes) { - ID3D11ComputeShader* shader = nullptr; - HRESULT result = device->CreateComputeShader(shaderBytes.data(), shaderBytes.size(), nullptr, &shader); - assert(SUCCEEDED(result)); - shader->SetPrivateData(WKPDID_D3DDebugObjectName, (UINT)name.length(), name.c_str()); - return shader; + return ComputePSO(device.Get(), name, shaderBytes); } -void DirectXDevice::dispatch(ID3D11ComputeShader *shader, uint3 resolution, uint3 groupSize, - std::initializer_list cbs, - std::initializer_list srvs, - std::initializer_list uavs, - std::initializer_list samplers) +void DirectXDevice::beginFrame() { - // Set resources - if(cbs.size()) - { - ID3D11Buffer* cbarray[D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT]; - int slot = 0; - for(auto cb : cbs) - cbarray[slot++] = cb; - deviceContext->CSSetConstantBuffers(0, static_cast(cbs.size()), cbarray); - } + cmdAllocator->Reset(); + cmdList->Reset(cmdAllocator.Get(), nullptr); - if(srvs.size()) - { - ID3D11ShaderResourceView* srvarray[D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT]; - int slot = 0; - for(auto srv : srvs) - srvarray[slot++] = srv; - deviceContext->CSSetShaderResources(0, static_cast(srvs.size()), srvarray); - } + D3D12_VIEWPORT viewport; + viewport.Height = (float)resolution.y; + viewport.Width = (float)resolution.x; + viewport.MaxDepth = 1.0f; + viewport.MinDepth = 0.0f; + viewport.TopLeftX = 0.0f; + viewport.TopLeftY = 0.0f; + cmdList->RSSetViewports(1, &viewport); - if(uavs.size()) - { - ID3D11UnorderedAccessView* uavarray[D3D11_1_UAV_SLOT_COUNT]; - int slot = 0; - for(auto uav : uavs) - uavarray[slot++] = uav; - deviceContext->CSSetUnorderedAccessViews(0, static_cast(uavs.size()), uavarray, nullptr); - } + ID3D12DescriptorHeap* heaps[] = { cbvSrvUavDescriptorHeap.Get(), samplerDescriptorHeap.Get() }; + cmdList->SetDescriptorHeaps(2, heaps); + cbvSrvUavDescriptorHeapOffset = 0; + samplerDescriptorHeapOffset = 0; - if(samplers.size()) - { - ID3D11SamplerState *samplerarray[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT]; - int slot = 0; - for(auto sampler : samplers) - samplerarray[slot++] = sampler; - deviceContext->CSSetSamplers(0, static_cast(samplers.size()), samplerarray); - } + frameFirstQuery = queryCounter; +} - // Render - uint3 groups = divRoundUp(resolution, groupSize); - deviceContext->CSSetShader(shader, nullptr, 0); - deviceContext->Dispatch(groups.x, groups.y, groups.z); +void DirectXDevice::dispatch( + const ComputePSO& shader, + uint3 resolution, + uint3 groupSize, + std::initializer_list cbs, + std::initializer_list srvs, + std::initializer_list uavs, + std::initializer_list samplers) +{ + D3D12_CPU_DESCRIPTOR_HANDLE descriptorTablesCpu[D3D12_MAX_ROOT_COST] = {}; + D3D12_GPU_DESCRIPTOR_HANDLE descriptorTablesGpu[D3D12_MAX_ROOT_COST] = {}; - // Remove resources - if(cbs.size()) + const ComputePSO::RootSignatureDesc& rootSigDesc = shader.getRootSignatureDesc(); + for (size_t rootParamIdx = 0; rootParamIdx < rootSigDesc.size(); rootParamIdx++) { - ID3D11Buffer* cbarray[D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT] = { 0 }; - deviceContext->CSSetConstantBuffers(0, static_cast(cbs.size()), cbarray); + const ComputePSO::RootParameter& rootParam = rootSigDesc[rootParamIdx]; + if (rootParam.type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + { + uint32_t& heapOffset = rootParam.isSamplerDescriptorTable ? samplerDescriptorHeapOffset : cbvSrvUavDescriptorHeapOffset; + uint32_t tableOffset = heapOffset; + heapOffset += rootParam.numDescriptors; + ID3D12DescriptorHeap* heap = rootParam.isSamplerDescriptorTable ? samplerDescriptorHeap.Get() : cbvSrvUavDescriptorHeap.Get(); + uint32_t descriptorSize = device->GetDescriptorHandleIncrementSize( + rootParam.isSamplerDescriptorTable ? D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER : D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + descriptorTablesCpu[rootParamIdx] = { heap->GetCPUDescriptorHandleForHeapStart().ptr + descriptorSize * tableOffset }; + descriptorTablesGpu[rootParamIdx] = { heap->GetGPUDescriptorHandleForHeapStart().ptr + descriptorSize * tableOffset }; + } } - if(srvs.size()) - { - ID3D11ShaderResourceView* srvarray[D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT] = { 0 }; - deviceContext->CSSetShaderResources(0, static_cast(srvs.size()), srvarray); - } + cmdList->SetComputeRootSignature(shader.getRootSignature()); - if(uavs.size()) + auto bindResources = [this, shader, &descriptorTablesCpu](std::initializer_list resources) { - ID3D11UnorderedAccessView* uavarray[D3D11_1_UAV_SLOT_COUNT] = { 0 }; - deviceContext->CSSetUnorderedAccessViews(0, static_cast(uavs.size()), uavarray, nullptr); - } + ComputePSO::EBindingType bindingType = {}; + if constexpr (std::is_same_v) + bindingType = ComputePSO::EBindingType::kCbv; + else if constexpr (std::is_same_v) + bindingType = ComputePSO::EBindingType::kSrv; + else if constexpr (std::is_same_v) + bindingType = ComputePSO::EBindingType::kUav; + else if constexpr (std::is_same_v) + bindingType = ComputePSO::EBindingType::kSampler; + else + assert(false); + + for (size_t idx = 0; idx < resources.size(); idx++) + { + auto& resource = resources.begin()[idx]; + const ComputePSO::Binding* binding = shader.getBinding((uint32_t)idx, bindingType); + if (!binding) + continue; + + if (binding->isRootDescriptor) + { + if constexpr (std::is_same_v) + cmdList->SetComputeRootConstantBufferView(binding->rootParamIdx, resource->GetGPUVirtualAddress()); + else if constexpr (std::is_same_v) + cmdList->SetComputeRootShaderResourceView(binding->rootParamIdx, resource->resource->GetGPUVirtualAddress()); + else if constexpr (std::is_same_v) + cmdList->SetComputeRootUnorderedAccessView(binding->rootParamIdx, resource->resource->GetGPUVirtualAddress()); + else + static_assert("Unknown type"); + } + else + { + uint32_t descriptorSize = device->GetDescriptorHandleIncrementSize( + bindingType == ComputePSO::EBindingType::kSampler ? D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER : D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE descriptorAddr = { descriptorTablesCpu[binding->rootParamIdx].ptr + binding->descriptorOffset * descriptorSize }; + + if constexpr (std::is_same_v) + { + D3D12_CONSTANT_BUFFER_VIEW_DESC desc = { + .BufferLocation = resource->GetGPUVirtualAddress(), + .SizeInBytes = (UINT)resource->GetDesc().Width }; + device->CreateConstantBufferView(&desc, descriptorAddr); + } + else if constexpr (std::is_same_v) + { + device->CreateShaderResourceView( + resource->resource, + resource->desc.has_value() ? &resource->desc.value() : nullptr, + descriptorAddr); + } + else if constexpr (std::is_same_v) + { + device->CreateUnorderedAccessView( + resource->resource, + nullptr, + resource->desc.has_value() ? &resource->desc.value() : nullptr, + descriptorAddr); + } + else if constexpr (std::is_same_v) + { + device->CreateSampler(&resource->samplerDesc, descriptorAddr); + } + else + static_assert("Unknown type"); + } + } + }; + + bindResources(cbs); + bindResources(srvs); + bindResources(uavs); + bindResources(samplers); - if(samplers.size()) + for (size_t rootParamIdx = 0; rootParamIdx < rootSigDesc.size(); rootParamIdx++) { - ID3D11SamplerState *samplerarray[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = { 0 }; - deviceContext->CSSetSamplers(0, static_cast(samplers.size()), samplerarray); + const ComputePSO::RootParameter& rootParam = rootSigDesc[rootParamIdx]; + if (rootParam.type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + cmdList->SetComputeRootDescriptorTable((UINT)rootParamIdx, descriptorTablesGpu[rootParamIdx]); } -} -void DirectXDevice::clear(ID3D11RenderTargetView *rtv, const float4 &color) -{ - float clearColor[4] = { color.x, color.y, color.z, color.w }; - deviceContext->ClearRenderTargetView(rtv, clearColor); -} + cmdList->SetPipelineState(shader.getPso()); + uint3 groups = divRoundUp(resolution, groupSize); + cmdList->Dispatch(groups.x, groups.y, groups.z); -void DirectXDevice::clearDepth(ID3D11DepthStencilView *depthStencilView) -{ - deviceContext->ClearDepthStencilView(depthStencilView, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 0.0f, (UINT8)0); + D3D12_RESOURCE_BARRIER barrier = + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV + }; + cmdList->ResourceBarrier(1, &barrier); } -void DirectXDevice::setRenderTargets(std::initializer_list rtvs, ID3D11DepthStencilView *depthStencilView) +void DirectXDevice::presentFrame() { - if (rtvs.size()) + uint32_t firstIdx = frameFirstQuery % queries.size(); + uint32_t remain = queryCounter - frameFirstQuery; + while (remain) { - ID3D11RenderTargetView* rtvarray[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT]; - int slot = 0; - for (auto rtv : rtvs) - rtvarray[slot++] = rtv; - deviceContext->OMSetRenderTargets(static_cast(rtvs.size()), rtvarray, depthStencilView); + uint32_t num = remain; + if (firstIdx + remain > queries.size()) + num = queries.size() - firstIdx; + + cmdList->ResolveQueryData( + queryHeap.Get(), + D3D12_QUERY_TYPE_TIMESTAMP, + firstIdx * 2, + num * 2, + queryResultBuffer.Get(), + firstIdx * sizeof(uint64_t) * 2); + + firstIdx = (firstIdx + num) % queries.size(); + remain -= num; } -} + cmdList->Close(); + + auto cmdListToSubmit = (ID3D12CommandList*)cmdList.Get(); + cmdQueue->ExecuteCommandLists(1, &cmdListToSubmit); -void DirectXDevice::presentFrame() -{ const bool vsync = false; swapChain->Present(vsync ? 1 : 0, 0); -} -void DirectXDevice::clearUAV(ID3D11UnorderedAccessView* uav, std::array color) -{ - deviceContext->ClearUnorderedAccessViewFloat(uav, color.data()); + cmdQueue->Signal(fence.Get(), ++fenceLastSignalVal); + HRESULT hr = fence->SetEventOnCompletion(fenceLastSignalVal, fenceEvent); + assert(SUCCEEDED(hr)); + WaitForSingleObject(fenceEvent, INFINITE); } QueryHandle DirectXDevice::startPerformanceQuery(unsigned id, const std::string& name) { - if (userDefinedAnnotation) - { - std::vector wname; - wname.resize(name.length() + 1); - size_t wnameLen = 0; - mbstowcs_s(&wnameLen, wname.data(), wname.size(), name.c_str(), name.length()); - userDefinedAnnotation->BeginEvent((const wchar_t*)wname.data()); - } - - PerformanceQuery& query = queries[queryCounter % queries.size()]; + PIXBeginEvent(cmdList.Get(), 0xffff00ff, name.c_str()); + + uint32_t queryIndex = queryCounter % queries.size(); + PerformanceQuery& query = queries[queryIndex]; query.id = id; query.name = name; - deviceContext->Begin(query.disjoint); - deviceContext->End(query.start); // NOTE: timestamp queries don't use Begin(), only End() - - QueryHandle out {queryCounter}; - queryCounter++; - return out; + + cmdList->EndQuery(queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, queryIndex * 2); + + return {queryCounter++}; } void DirectXDevice::endPerformanceQuery(QueryHandle queryHandle) { - PerformanceQuery& query = queries[queryHandle.queryIndex % queries.size()]; + cmdList->EndQuery( + queryHeap.Get(), + D3D12_QUERY_TYPE_TIMESTAMP, + (queryHandle.queryIndex % queries.size()) * 2 + 1); - deviceContext->End(query.end); // NOTE: timestamp queries don't use Begin(), only End() - deviceContext->End(query.disjoint); - - if (userDefinedAnnotation) - userDefinedAnnotation->EndEvent(); + PIXEndEvent(cmdList.Get()); } void DirectXDevice::processPerformanceResults(const std::function& functor) { - while(true) - { - PerformanceQuery& query = queries[queryProcessCounter % queries.size()]; - - D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint; - bool succDisjoint = deviceContext->GetData(query.disjoint, &disjoint, sizeof(disjoint), 0) == S_OK; - - UINT64 start = 0; - UINT64 end = 0; - bool succStart = deviceContext->GetData(query.start, &start, sizeof(start), 0) == S_OK; - bool succEnd = deviceContext->GetData(query.end, &end, sizeof(end), 0) == S_OK; + uint64_t* results = nullptr; + HRESULT hr = queryResultBuffer->Map(0, nullptr, (void**)&results); + assert(SUCCEEDED(hr)); - // Wait until all queries are ready - if (!succDisjoint || !succStart || !succEnd) - break; + uint64_t frequency; + hr = cmdQueue->GetTimestampFrequency(&frequency); + assert(SUCCEEDED(hr)); - if (!disjoint.Disjoint) - { - UINT64 d = end - start; - float delta = (float(d) / float(disjoint.Frequency)) * 1000.0f; + for (uint32_t idx = frameFirstQuery; idx < queryCounter; idx++) + { + uint32_t queryIdx = idx % queries.size(); + PerformanceQuery& query = queries[queryIdx]; + uint64_t start = results[queryIdx * 2]; + uint64_t end = results[queryIdx * 2 + 1]; - // Call functor to process results - functor(delta, query.id, query.name); - } + UINT64 d = end - start; + float delta = (float(d) / float(frequency)) * 1000.0f; - queryProcessCounter++; + // Call functor to process results + functor(delta, query.id, query.name); } + + queryResultBuffer->Unmap(0, nullptr); } diff --git a/perftest/directx.h b/perftest/directx.h index 0536702..bc7a843 100644 --- a/perftest/directx.h +++ b/perftest/directx.h @@ -1,12 +1,19 @@ #pragma once #include "datatypes.h" -#include "com_ptr.h" #include -#include +#include +#include #include #include #include +#include #include +#include +#include +#include + +template +using ComPtr = Microsoft::WRL::ComPtr; struct QueryHandle { @@ -15,27 +22,93 @@ struct QueryHandle struct PerformanceQuery { - com_ptr disjoint; - com_ptr start; - com_ptr end; - unsigned id; std::string name; }; -std::vector> enumerateAdapters(); +class SamplerState +{ +public: + SamplerState() = default; + SamplerState(const D3D12_SAMPLER_DESC& samplerDesc) + : samplerDesc(samplerDesc) + {} -class DirectXDevice + const D3D12_SAMPLER_DESC samplerDesc = {}; +}; + +class ShaderResourceView +{ +public: + ShaderResourceView() = default; + ShaderResourceView(ID3D12Resource* resource, std::optional desc) + : resource(resource), desc(desc) + {} + + ID3D12Resource* const resource = {}; + const std::optional desc = {}; +}; + +class UnorderedAccessView { public: + UnorderedAccessView() = default; + UnorderedAccessView(ID3D12Resource* resource, std::optional desc) + : resource(resource), desc(desc) + {} + + ID3D12Resource* const resource = {}; + const std::optional desc = {}; +}; - enum class BufferType +class ComputePSO +{ +public: + struct Binding { - Default, - Structured, - ByteAddress + uint32_t rootParamIdx; + bool isRootDescriptor; + uint32_t descriptorOffset; }; + enum class EBindingType + { + kCbv = 0, + kSrv, + kUav, + kSampler, + kCount + }; + + struct RootParameter + { + D3D12_ROOT_PARAMETER_TYPE type; + uint32_t numDescriptors; + bool isSamplerDescriptorTable; + }; + + using RootSignatureDesc = std::vector; + + ComputePSO() = delete; + ComputePSO(ID3D12Device* device, const std::string& name, const std::vector& shaderBytes); + + ID3D12PipelineState* getPso() const { return pso.Get(); } + ID3D12RootSignature* getRootSignature() const { return rootSig.Get(); } + const RootSignatureDesc& getRootSignatureDesc() const { return rootSignatureDesc; } + const Binding* getBinding(uint32_t slot, EBindingType type) const; + +private: + ComPtr pso; + ComPtr rootSig; + RootSignatureDesc rootSignatureDesc; + std::unordered_map bindings[(int)EBindingType::kCount]; +}; + +std::vector> enumerateAdapters(); + +class DirectXDevice +{ +public: enum class SamplerType { Nearest, @@ -44,49 +117,47 @@ class DirectXDevice }; DirectXDevice(HWND window, uint2 resolution, IDXGIAdapter* adapter = nullptr); + ~DirectXDevice(); // Create resources - ID3D11UnorderedAccessView* createBackBufferUAV(); - ID3D11DepthStencilView* createDepthStencilView(uint2 size); - ID3D11RenderTargetView* DirectXDevice::createBackBufferRTV(); - ID3D11ComputeShader* createComputeShader(const std::string& name, const std::vector& shaderBytes); - - ID3D11Buffer* createConstantBuffer(unsigned bytes); - ID3D11Buffer* createBuffer(unsigned numElements, unsigned strideBytes, BufferType type = BufferType::Default); - ID3D11Texture2D* createTexture2d(uint2 dimensions, DXGI_FORMAT format, unsigned mips); - ID3D11Texture3D* createTexture3d(uint3 dimensions, DXGI_FORMAT format, unsigned mips); - ID3D11SamplerState* createSampler(SamplerType type); - - ID3D11UnorderedAccessView* createUAV(ID3D11Resource* resource); - ID3D11UnorderedAccessView* createTypedUAV(ID3D11Resource* buffer, unsigned numElements, DXGI_FORMAT format); - ID3D11UnorderedAccessView* createByteAddressUAV(ID3D11Resource* buffer, unsigned numElements); - - ID3D11ShaderResourceView* createSRV(ID3D11Resource* buffer); - ID3D11ShaderResourceView* createTypedSRV(ID3D11Resource* buffer, unsigned numElements, DXGI_FORMAT format); - ID3D11ShaderResourceView* createStructuredSRV(ID3D11Resource* buffer, unsigned numElements, unsigned stride); - ID3D11ShaderResourceView* createByteAddressSRV(ID3D11Resource* buffer, unsigned numElements); + ComputePSO createComputeShader(const std::string& name, const std::vector& shaderBytes); + + ComPtr createConstantBuffer(unsigned bytes); + ComPtr createBuffer(unsigned numElements, unsigned strideBytes); + ComPtr createTexture2d(uint2 dimensions, DXGI_FORMAT format, unsigned mips); + ComPtr createTexture3d(uint3 dimensions, DXGI_FORMAT format, unsigned mips); + SamplerState createSampler(SamplerType type); + + UnorderedAccessView createUAV(ID3D12Resource* resource); + UnorderedAccessView createTypedUAV(ID3D12Resource* buffer, unsigned numElements, DXGI_FORMAT format); + UnorderedAccessView createByteAddressUAV(ID3D12Resource* buffer, unsigned numElements); + + ShaderResourceView createSRV(ID3D12Resource* resource); + ShaderResourceView createTypedSRV(ID3D12Resource* buffer, unsigned numElements, DXGI_FORMAT format); + ShaderResourceView createStructuredSRV(ID3D12Resource* buffer, unsigned numElements, unsigned stride); + ShaderResourceView createByteAddressSRV(ID3D12Resource* buffer, unsigned numElements); // Data update template - void updateConstantBuffer(ID3D11Buffer* cbuffer, const T& cb) + void updateConstantBuffer(ID3D12Resource* cbuffer, const T& cb) { - D3D11_MAPPED_SUBRESOURCE map; - deviceContext->Map(cbuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); - memcpy(map.pData, &cb, sizeof(cb)); - deviceContext->Unmap(cbuffer, 0); + void* ptr = nullptr; + cbuffer->Map(0, nullptr, &ptr); + memcpy(ptr, &cb, sizeof(cb)); + cbuffer->Unmap(0, nullptr); } // Commands - void clear(ID3D11RenderTargetView* rtv, const float4& color); - void clearDepth(ID3D11DepthStencilView *depthStencilView); - void setRenderTargets(std::initializer_list rtvs, ID3D11DepthStencilView* depthStencilView); - void dispatch(ID3D11ComputeShader* shader, uint3 resolution, uint3 groupSize, - std::initializer_list cbs, - std::initializer_list srvs, - std::initializer_list uavs = {}, - std::initializer_list samplers = {}); + void beginFrame(); + void dispatch( + const ComputePSO& shader, + uint3 resolution, + uint3 groupSize, + std::initializer_list cbs, + std::initializer_list srvs, + std::initializer_list uavs = {}, + std::initializer_list samplers = {}); void presentFrame(); - void clearUAV(ID3D11UnorderedAccessView* uav, std::array color); // Performance querys QueryHandle startPerformanceQuery(unsigned id, const std::string& name); @@ -96,8 +167,8 @@ class DirectXDevice // Device and window HWND getWindowHandle() { return windowHandle; } uint2 getResolution() { return resolution; } - ID3D11Device* getDevice() { return device; } - ID3D11DeviceContext* getDeviceContext() { return deviceContext; } + ID3D12Device* getDevice() { return device.Get(); } + ID3D12GraphicsCommandList* getCmdList() { return cmdList.Get(); } private: @@ -106,13 +177,24 @@ class DirectXDevice uint2 resolution; // DirectX - com_ptr swapChain; - com_ptr device; - com_ptr deviceContext; - com_ptr userDefinedAnnotation; + ComPtr debugInterface; + ComPtr swapChain; + ComPtr device; + ComPtr cmdQueue; + ComPtr fence; + HANDLE fenceEvent; + UINT64 fenceLastSignalVal; + ComPtr cmdList; + ComPtr cmdAllocator; + ComPtr queryHeap; + ComPtr queryResultBuffer; + ComPtr cbvSrvUavDescriptorHeap; + ComPtr samplerDescriptorHeap; + uint32_t cbvSrvUavDescriptorHeapOffset = 0; + uint32_t samplerDescriptorHeapOffset = 0; // Queries std::array queries; unsigned queryCounter = 0; - unsigned queryProcessCounter = 0; + unsigned frameFirstQuery = 0; }; diff --git a/perftest/graphicsUtil.h b/perftest/graphicsUtil.h index 470dbba..b426dd9 100644 --- a/perftest/graphicsUtil.h +++ b/perftest/graphicsUtil.h @@ -2,7 +2,7 @@ #include "directx.h" #include "file.h" -inline ID3D11ComputeShader *loadComputeShader(DirectXDevice &dx, const std::string &filename) +inline ComputePSO loadComputeShader(DirectXDevice &dx, const std::string &filename) { auto shaderBlob = loadFile(filename); return dx.createComputeShader(filename, shaderBlob); diff --git a/perftest/loadConstantBody.hlsli b/perftest/loadConstantBody.hlsli index 0233433..96d56fd 100644 --- a/perftest/loadConstantBody.hlsli +++ b/perftest/loadConstantBody.hlsli @@ -8,10 +8,16 @@ cbuffer CB0 : register(b0) LoadConstantsWithArray loadConstants; }; +#define ROOT_SIGNATURE \ + "DescriptorTable(" \ + "CBV(b0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE),"\ + "UAV(u0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE))" + #define THREAD_GROUP_SIZE 256 groupshared float dummyLDS[THREAD_GROUP_SIZE]; +[RootSignature(ROOT_SIGNATURE)] [numthreads(THREAD_GROUP_SIZE, 1, 1)] void main(uint3 tid : SV_DispatchThreadID, uint gix : SV_GroupIndex) { diff --git a/perftest/loadRawBody.hlsli b/perftest/loadRawBody.hlsli index b3a18af..231ea01 100644 --- a/perftest/loadRawBody.hlsli +++ b/perftest/loadRawBody.hlsli @@ -9,10 +9,17 @@ cbuffer CB0 : register(b0) LoadConstants loadConstants; }; +#define ROOT_SIGNATURE \ + "DescriptorTable(" \ + "CBV(b0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE),"\ + "SRV(t0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE),"\ + "UAV(u0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE))" + #define THREAD_GROUP_SIZE 256 groupshared float dummyLDS[THREAD_GROUP_SIZE]; +[RootSignature(ROOT_SIGNATURE)] [numthreads(THREAD_GROUP_SIZE, 1, 1)] void main(uint3 tid : SV_DispatchThreadID, uint gix : SV_GroupIndex) { diff --git a/perftest/loadStructuredBody.hlsli b/perftest/loadStructuredBody.hlsli index 068f79a..91cfe9d 100644 --- a/perftest/loadStructuredBody.hlsli +++ b/perftest/loadStructuredBody.hlsli @@ -8,10 +8,17 @@ cbuffer CB0 : register(b0) LoadConstants loadConstants; }; +#define ROOT_SIGNATURE \ + "DescriptorTable(" \ + "CBV(b0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE),"\ + "SRV(t0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE),"\ + "UAV(u0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE))" + #define THREAD_GROUP_SIZE 256 groupshared float dummyLDS[THREAD_GROUP_SIZE]; +[RootSignature(ROOT_SIGNATURE)] [numthreads(THREAD_GROUP_SIZE, 1, 1)] void main(uint3 tid : SV_DispatchThreadID, uint gix : SV_GroupIndex) { diff --git a/perftest/loadTexBody.hlsli b/perftest/loadTexBody.hlsli index 8b4087d..6eb2909 100644 --- a/perftest/loadTexBody.hlsli +++ b/perftest/loadTexBody.hlsli @@ -8,10 +8,17 @@ cbuffer CB0 : register(b0) LoadConstants loadConstants; }; +#define ROOT_SIGNATURE \ + "DescriptorTable(" \ + "CBV(b0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE),"\ + "SRV(t0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE),"\ + "UAV(u0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE))" + #define THREAD_GROUP_DIM 16 groupshared float dummyLDS[THREAD_GROUP_DIM][THREAD_GROUP_DIM]; +[RootSignature(ROOT_SIGNATURE)] [numthreads(THREAD_GROUP_DIM, THREAD_GROUP_DIM, 1)] void main(uint3 tid : SV_DispatchThreadID, uint3 gid : SV_GroupThreadID) { diff --git a/perftest/loadTypedBody.hlsli b/perftest/loadTypedBody.hlsli index 068f79a..91cfe9d 100644 --- a/perftest/loadTypedBody.hlsli +++ b/perftest/loadTypedBody.hlsli @@ -8,10 +8,17 @@ cbuffer CB0 : register(b0) LoadConstants loadConstants; }; +#define ROOT_SIGNATURE \ + "DescriptorTable(" \ + "CBV(b0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE),"\ + "SRV(t0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE),"\ + "UAV(u0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE))" + #define THREAD_GROUP_SIZE 256 groupshared float dummyLDS[THREAD_GROUP_SIZE]; +[RootSignature(ROOT_SIGNATURE)] [numthreads(THREAD_GROUP_SIZE, 1, 1)] void main(uint3 tid : SV_DispatchThreadID, uint gix : SV_GroupIndex) { diff --git a/perftest/main.cpp b/perftest/main.cpp index 1f9dd32..9918482 100644 --- a/perftest/main.cpp +++ b/perftest/main.cpp @@ -8,29 +8,29 @@ class BenchTest { public: - BenchTest(DirectXDevice& dx, ID3D11UnorderedAccessView* output) : dx(dx), output(output), testCaseNumber(0) + BenchTest(DirectXDevice& dx, const UnorderedAccessView& output) : dx(dx), output(output), testCaseNumber(0) { } - void testCase(ID3D11ComputeShader* shader, ID3D11Buffer* cb, ID3D11ShaderResourceView* source, const std::string& name) + void testCase(ComputePSO& shader, ID3D12Resource* cb, const ShaderResourceView& source, const std::string& name) { const uint3 workloadThreadCount(1024, 1024, 1); const uint3 workloadGroupSize(256, 1, 1); QueryHandle query = dx.startPerformanceQuery(testCaseNumber, name); - dx.dispatch(shader, workloadThreadCount, workloadGroupSize, { cb }, { source }, { output }, {}); + dx.dispatch(shader, workloadThreadCount, workloadGroupSize, { cb }, { &source }, { &output }, {}); dx.endPerformanceQuery(query); testCaseNumber++; } - void testCaseWithSampler(ID3D11ComputeShader* shader, ID3D11Buffer* cb, ID3D11ShaderResourceView* source, ID3D11SamplerState* sampler, const std::string& name) + void testCaseWithSampler(ComputePSO& shader, ID3D12Resource* cb, const ShaderResourceView& source, const SamplerState& sampler, const std::string& name) { const uint3 workloadThreadCount(1024, 1024, 1); const uint3 workloadGroupSize(256, 1, 1); QueryHandle query = dx.startPerformanceQuery(testCaseNumber, name); - dx.dispatch(shader, workloadThreadCount, workloadGroupSize, { cb }, { source }, { output }, { sampler }); + dx.dispatch(shader, workloadThreadCount, workloadGroupSize, { cb }, { &source }, { &output }, { &sampler }); dx.endPerformanceQuery(query); testCaseNumber++; @@ -38,7 +38,7 @@ class BenchTest private: DirectXDevice& dx; - ID3D11UnorderedAccessView* output; + const UnorderedAccessView& output; unsigned testCaseNumber; }; @@ -46,7 +46,7 @@ class BenchTest int main(int argc, char *argv[]) { // Enumerate adapters - std::vector> adapters = enumerateAdapters(); + std::vector> adapters = enumerateAdapters(); printf("PerfTest\nTo select adapter, use: PerfTest.exe [ADAPTER_INDEX]\n\n"); printf("Adapters found:\n"); int index = 0; @@ -69,135 +69,136 @@ int main(int argc, char *argv[]) // Init systems uint2 resolution(256, 256); HWND window = createWindow(resolution); - DirectXDevice dx(window, resolution, adapters[selectedAdapterIdx]); + DirectXDevice dx(window, resolution, adapters[selectedAdapterIdx].Get()); // Load shaders - com_ptr shaderLoadTyped1dInvariant = loadComputeShader(dx, "shaders/loadTyped1dInvariant.cso"); - com_ptr shaderLoadTyped1dLinear = loadComputeShader(dx, "shaders/loadTyped1dLinear.cso"); - com_ptr shaderLoadTyped1dRandom = loadComputeShader(dx, "shaders/loadTyped1dRandom.cso"); - com_ptr shaderLoadTyped2dInvariant = loadComputeShader(dx, "shaders/loadTyped2dInvariant.cso"); - com_ptr shaderLoadTyped2dLinear = loadComputeShader(dx, "shaders/loadTyped2dLinear.cso"); - com_ptr shaderLoadTyped2dRandom = loadComputeShader(dx, "shaders/loadTyped2dRandom.cso"); - com_ptr shaderLoadTyped4dInvariant = loadComputeShader(dx, "shaders/loadTyped4dInvariant.cso"); - com_ptr shaderLoadTyped4dLinear = loadComputeShader(dx, "shaders/loadTyped4dLinear.cso"); - com_ptr shaderLoadTyped4dRandom = loadComputeShader(dx, "shaders/loadTyped4dRandom.cso"); - - com_ptr shaderLoadRaw1dInvariant = loadComputeShader(dx, "shaders/loadRaw1dInvariant.cso"); - com_ptr shaderLoadRaw1dLinear = loadComputeShader(dx, "shaders/loadRaw1dLinear.cso"); - com_ptr shaderLoadRaw1dRandom = loadComputeShader(dx, "shaders/loadRaw1dRandom.cso"); - com_ptr shaderLoadRaw2dInvariant = loadComputeShader(dx, "shaders/loadRaw2dInvariant.cso"); - com_ptr shaderLoadRaw2dLinear = loadComputeShader(dx, "shaders/loadRaw2dLinear.cso"); - com_ptr shaderLoadRaw2dRandom = loadComputeShader(dx, "shaders/loadRaw2dRandom.cso"); - com_ptr shaderLoadRaw3dInvariant = loadComputeShader(dx, "shaders/loadRaw3dInvariant.cso"); - com_ptr shaderLoadRaw3dLinear = loadComputeShader(dx, "shaders/loadRaw3dLinear.cso"); - com_ptr shaderLoadRaw3dRandom = loadComputeShader(dx, "shaders/loadRaw3dRandom.cso"); - com_ptr shaderLoadRaw4dInvariant = loadComputeShader(dx, "shaders/loadRaw4dInvariant.cso"); - com_ptr shaderLoadRaw4dLinear = loadComputeShader(dx, "shaders/loadRaw4dLinear.cso"); - com_ptr shaderLoadRaw4dRandom = loadComputeShader(dx, "shaders/loadRaw4dRandom.cso"); - - com_ptr shaderLoadTex1dInvariant = loadComputeShader(dx, "shaders/loadTex1dInvariant.cso"); - com_ptr shaderLoadTex1dLinear = loadComputeShader(dx, "shaders/loadTex1dLinear.cso"); - com_ptr shaderLoadTex1dRandom = loadComputeShader(dx, "shaders/loadTex1dRandom.cso"); - com_ptr shaderLoadTex2dInvariant = loadComputeShader(dx, "shaders/loadTex2dInvariant.cso"); - com_ptr shaderLoadTex2dLinear = loadComputeShader(dx, "shaders/loadTex2dLinear.cso"); - com_ptr shaderLoadTex2dRandom = loadComputeShader(dx, "shaders/loadTex2dRandom.cso"); - com_ptr shaderLoadTex4dInvariant = loadComputeShader(dx, "shaders/loadTex4dInvariant.cso"); - com_ptr shaderLoadTex4dLinear = loadComputeShader(dx, "shaders/loadTex4dLinear.cso"); - com_ptr shaderLoadTex4dRandom = loadComputeShader(dx, "shaders/loadTex4dRandom.cso"); - - com_ptr shaderSampleTex1dInvariant = loadComputeShader(dx, "shaders/sampleTex1dInvariant.cso"); - com_ptr shaderSampleTex1dLinear = loadComputeShader(dx, "shaders/sampleTex1dLinear.cso"); - com_ptr shaderSampleTex1dRandom = loadComputeShader(dx, "shaders/sampleTex1dRandom.cso"); - com_ptr shaderSampleTex2dInvariant = loadComputeShader(dx, "shaders/sampleTex2dInvariant.cso"); - com_ptr shaderSampleTex2dLinear = loadComputeShader(dx, "shaders/sampleTex2dLinear.cso"); - com_ptr shaderSampleTex2dRandom = loadComputeShader(dx, "shaders/sampleTex2dRandom.cso"); - com_ptr shaderSampleTex4dInvariant = loadComputeShader(dx, "shaders/sampleTex4dInvariant.cso"); - com_ptr shaderSampleTex4dLinear = loadComputeShader(dx, "shaders/sampleTex4dLinear.cso"); - com_ptr shaderSampleTex4dRandom = loadComputeShader(dx, "shaders/sampleTex4dRandom.cso"); - - com_ptr shaderLoadConstant4dInvariant = loadComputeShader(dx, "shaders/loadConstant4dInvariant.cso"); - com_ptr shaderLoadConstant4dLinear = loadComputeShader(dx, "shaders/loadConstant4dLinear.cso"); - com_ptr shaderLoadConstant4dRandom = loadComputeShader(dx, "shaders/loadConstant4dRandom.cso"); - - com_ptr shaderLoadStructured1dInvariant = loadComputeShader(dx, "shaders/loadStructured1dInvariant.cso"); - com_ptr shaderLoadStructured1dLinear = loadComputeShader(dx, "shaders/loadStructured1dLinear.cso"); - com_ptr shaderLoadStructured1dRandom = loadComputeShader(dx, "shaders/loadStructured1dRandom.cso"); - com_ptr shaderLoadStructured2dInvariant = loadComputeShader(dx, "shaders/loadStructured2dInvariant.cso"); - com_ptr shaderLoadStructured2dLinear = loadComputeShader(dx, "shaders/loadStructured2dLinear.cso"); - com_ptr shaderLoadStructured2dRandom = loadComputeShader(dx, "shaders/loadStructured2dRandom.cso"); - com_ptr shaderLoadStructured4dInvariant = loadComputeShader(dx, "shaders/loadStructured4dInvariant.cso"); - com_ptr shaderLoadStructured4dLinear = loadComputeShader(dx, "shaders/loadStructured4dLinear.cso"); - com_ptr shaderLoadStructured4dRandom = loadComputeShader(dx, "shaders/loadStructured4dRandom.cso"); - + printf("Loading shaders..."); + ComputePSO shaderLoadTyped1dInvariant = loadComputeShader(dx, "shaders/loadTyped1dInvariant.cso"); + ComputePSO shaderLoadTyped1dLinear = loadComputeShader(dx, "shaders/loadTyped1dLinear.cso"); + ComputePSO shaderLoadTyped1dRandom = loadComputeShader(dx, "shaders/loadTyped1dRandom.cso"); + ComputePSO shaderLoadTyped2dInvariant = loadComputeShader(dx, "shaders/loadTyped2dInvariant.cso"); + ComputePSO shaderLoadTyped2dLinear = loadComputeShader(dx, "shaders/loadTyped2dLinear.cso"); + ComputePSO shaderLoadTyped2dRandom = loadComputeShader(dx, "shaders/loadTyped2dRandom.cso"); + ComputePSO shaderLoadTyped4dInvariant = loadComputeShader(dx, "shaders/loadTyped4dInvariant.cso"); + ComputePSO shaderLoadTyped4dLinear = loadComputeShader(dx, "shaders/loadTyped4dLinear.cso"); + ComputePSO shaderLoadTyped4dRandom = loadComputeShader(dx, "shaders/loadTyped4dRandom.cso"); + + ComputePSO shaderLoadRaw1dInvariant = loadComputeShader(dx, "shaders/loadRaw1dInvariant.cso"); + ComputePSO shaderLoadRaw1dLinear = loadComputeShader(dx, "shaders/loadRaw1dLinear.cso"); + ComputePSO shaderLoadRaw1dRandom = loadComputeShader(dx, "shaders/loadRaw1dRandom.cso"); + ComputePSO shaderLoadRaw2dInvariant = loadComputeShader(dx, "shaders/loadRaw2dInvariant.cso"); + ComputePSO shaderLoadRaw2dLinear = loadComputeShader(dx, "shaders/loadRaw2dLinear.cso"); + ComputePSO shaderLoadRaw2dRandom = loadComputeShader(dx, "shaders/loadRaw2dRandom.cso"); + ComputePSO shaderLoadRaw3dInvariant = loadComputeShader(dx, "shaders/loadRaw3dInvariant.cso"); + ComputePSO shaderLoadRaw3dLinear = loadComputeShader(dx, "shaders/loadRaw3dLinear.cso"); + ComputePSO shaderLoadRaw3dRandom = loadComputeShader(dx, "shaders/loadRaw3dRandom.cso"); + ComputePSO shaderLoadRaw4dInvariant = loadComputeShader(dx, "shaders/loadRaw4dInvariant.cso"); + ComputePSO shaderLoadRaw4dLinear = loadComputeShader(dx, "shaders/loadRaw4dLinear.cso"); + ComputePSO shaderLoadRaw4dRandom = loadComputeShader(dx, "shaders/loadRaw4dRandom.cso"); + + ComputePSO shaderLoadTex1dInvariant = loadComputeShader(dx, "shaders/loadTex1dInvariant.cso"); + ComputePSO shaderLoadTex1dLinear = loadComputeShader(dx, "shaders/loadTex1dLinear.cso"); + ComputePSO shaderLoadTex1dRandom = loadComputeShader(dx, "shaders/loadTex1dRandom.cso"); + ComputePSO shaderLoadTex2dInvariant = loadComputeShader(dx, "shaders/loadTex2dInvariant.cso"); + ComputePSO shaderLoadTex2dLinear = loadComputeShader(dx, "shaders/loadTex2dLinear.cso"); + ComputePSO shaderLoadTex2dRandom = loadComputeShader(dx, "shaders/loadTex2dRandom.cso"); + ComputePSO shaderLoadTex4dInvariant = loadComputeShader(dx, "shaders/loadTex4dInvariant.cso"); + ComputePSO shaderLoadTex4dLinear = loadComputeShader(dx, "shaders/loadTex4dLinear.cso"); + ComputePSO shaderLoadTex4dRandom = loadComputeShader(dx, "shaders/loadTex4dRandom.cso"); + + ComputePSO shaderSampleTex1dInvariant = loadComputeShader(dx, "shaders/sampleTex1dInvariant.cso"); + ComputePSO shaderSampleTex1dLinear = loadComputeShader(dx, "shaders/sampleTex1dLinear.cso"); + ComputePSO shaderSampleTex1dRandom = loadComputeShader(dx, "shaders/sampleTex1dRandom.cso"); + ComputePSO shaderSampleTex2dInvariant = loadComputeShader(dx, "shaders/sampleTex2dInvariant.cso"); + ComputePSO shaderSampleTex2dLinear = loadComputeShader(dx, "shaders/sampleTex2dLinear.cso"); + ComputePSO shaderSampleTex2dRandom = loadComputeShader(dx, "shaders/sampleTex2dRandom.cso"); + ComputePSO shaderSampleTex4dInvariant = loadComputeShader(dx, "shaders/sampleTex4dInvariant.cso"); + ComputePSO shaderSampleTex4dLinear = loadComputeShader(dx, "shaders/sampleTex4dLinear.cso"); + ComputePSO shaderSampleTex4dRandom = loadComputeShader(dx, "shaders/sampleTex4dRandom.cso"); + + ComputePSO shaderLoadConstant4dInvariant = loadComputeShader(dx, "shaders/loadConstant4dInvariant.cso"); + ComputePSO shaderLoadConstant4dLinear = loadComputeShader(dx, "shaders/loadConstant4dLinear.cso"); + ComputePSO shaderLoadConstant4dRandom = loadComputeShader(dx, "shaders/loadConstant4dRandom.cso"); + + ComputePSO shaderLoadStructured1dInvariant = loadComputeShader(dx, "shaders/loadStructured1dInvariant.cso"); + ComputePSO shaderLoadStructured1dLinear = loadComputeShader(dx, "shaders/loadStructured1dLinear.cso"); + ComputePSO shaderLoadStructured1dRandom = loadComputeShader(dx, "shaders/loadStructured1dRandom.cso"); + ComputePSO shaderLoadStructured2dInvariant = loadComputeShader(dx, "shaders/loadStructured2dInvariant.cso"); + ComputePSO shaderLoadStructured2dLinear = loadComputeShader(dx, "shaders/loadStructured2dLinear.cso"); + ComputePSO shaderLoadStructured2dRandom = loadComputeShader(dx, "shaders/loadStructured2dRandom.cso"); + ComputePSO shaderLoadStructured4dInvariant = loadComputeShader(dx, "shaders/loadStructured4dInvariant.cso"); + ComputePSO shaderLoadStructured4dLinear = loadComputeShader(dx, "shaders/loadStructured4dLinear.cso"); + ComputePSO shaderLoadStructured4dRandom = loadComputeShader(dx, "shaders/loadStructured4dRandom.cso"); + printf(" Done\n"); // Create buffers and output UAV - com_ptr bufferOutput = dx.createBuffer(2048, 4, DirectXDevice::BufferType::ByteAddress); - com_ptr bufferInput = dx.createBuffer(1024, 16, DirectXDevice::BufferType::ByteAddress); - com_ptr bufferInputStructured4 = dx.createBuffer(1024, 4, DirectXDevice::BufferType::Structured); - com_ptr bufferInputStructured8 = dx.createBuffer(1024, 8, DirectXDevice::BufferType::Structured); - com_ptr bufferInputStructured16 = dx.createBuffer(1024, 16, DirectXDevice::BufferType::Structured); - com_ptr outputUAV = dx.createTypedUAV(bufferOutput, 2048, DXGI_FORMAT_R32_FLOAT); + ComPtr bufferOutput = dx.createBuffer(2048, 4); + ComPtr bufferInput = dx.createBuffer(1024, 16); + ComPtr bufferInputStructured4 = dx.createBuffer(1024, 4); + ComPtr bufferInputStructured8 = dx.createBuffer(1024, 8); + ComPtr bufferInputStructured16 = dx.createBuffer(1024, 16); + UnorderedAccessView outputUAV = dx.createTypedUAV(bufferOutput.Get(), 2048, DXGI_FORMAT_R32_FLOAT); // SRVs for benchmarking different buffer view formats/types - com_ptr typedSRV_R8 = dx.createTypedSRV(bufferInput, 1024, DXGI_FORMAT_R8_UNORM); - com_ptr typedSRV_R16F = dx.createTypedSRV(bufferInput, 1024, DXGI_FORMAT_R16_FLOAT); - com_ptr typedSRV_R32F = dx.createTypedSRV(bufferInput, 1024, DXGI_FORMAT_R32_FLOAT); - com_ptr typedSRV_RG8 = dx.createTypedSRV(bufferInput, 1024, DXGI_FORMAT_R8G8_UNORM); - com_ptr typedSRV_RG16F = dx.createTypedSRV(bufferInput, 1024, DXGI_FORMAT_R16G16_FLOAT); - com_ptr typedSRV_RG32F = dx.createTypedSRV(bufferInput, 1024, DXGI_FORMAT_R32G32_FLOAT); - com_ptr typedSRV_RGBA8 = dx.createTypedSRV(bufferInput, 1024, DXGI_FORMAT_R8G8B8A8_UNORM); - com_ptr typedSRV_RGBA16F = dx.createTypedSRV(bufferInput, 1024, DXGI_FORMAT_R16G16B16A16_FLOAT); - com_ptr typedSRV_RGBA32F = dx.createTypedSRV(bufferInput, 1024, DXGI_FORMAT_R32G32B32A32_FLOAT); - com_ptr structuredSRV_R32F = dx.createStructuredSRV(bufferInputStructured4, 1024, 4); - com_ptr structuredSRV_RG32F = dx.createStructuredSRV(bufferInputStructured8, 1024, 8); - com_ptr structuredSRV_RGBA32F = dx.createStructuredSRV(bufferInputStructured16, 1024, 16); - com_ptr byteAddressSRV = dx.createByteAddressSRV(bufferInput, 1024); + ShaderResourceView typedSRV_R8 = dx.createTypedSRV(bufferInput.Get(), 1024, DXGI_FORMAT_R8_UNORM); + ShaderResourceView typedSRV_R16F = dx.createTypedSRV(bufferInput.Get(), 1024, DXGI_FORMAT_R16_FLOAT); + ShaderResourceView typedSRV_R32F = dx.createTypedSRV(bufferInput.Get(), 1024, DXGI_FORMAT_R32_FLOAT); + ShaderResourceView typedSRV_RG8 = dx.createTypedSRV(bufferInput.Get(), 1024, DXGI_FORMAT_R8G8_UNORM); + ShaderResourceView typedSRV_RG16F = dx.createTypedSRV(bufferInput.Get(), 1024, DXGI_FORMAT_R16G16_FLOAT); + ShaderResourceView typedSRV_RG32F = dx.createTypedSRV(bufferInput.Get(), 1024, DXGI_FORMAT_R32G32_FLOAT); + ShaderResourceView typedSRV_RGBA8 = dx.createTypedSRV(bufferInput.Get(), 1024, DXGI_FORMAT_R8G8B8A8_UNORM); + ShaderResourceView typedSRV_RGBA16F = dx.createTypedSRV(bufferInput.Get(), 1024, DXGI_FORMAT_R16G16B16A16_FLOAT); + ShaderResourceView typedSRV_RGBA32F = dx.createTypedSRV(bufferInput.Get(), 1024, DXGI_FORMAT_R32G32B32A32_FLOAT); + ShaderResourceView structuredSRV_R32F = dx.createStructuredSRV(bufferInputStructured4.Get(), 1024, 4); + ShaderResourceView structuredSRV_RG32F = dx.createStructuredSRV(bufferInputStructured8.Get(), 1024, 8); + ShaderResourceView structuredSRV_RGBA32F = dx.createStructuredSRV(bufferInputStructured16.Get(), 1024, 16); + ShaderResourceView byteAddressSRV = dx.createByteAddressSRV(bufferInput.Get(), 1024); // Create input textures - com_ptr texR8 = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R8_UNORM, 1); - com_ptr texR16F = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R16_FLOAT, 1); - com_ptr texR32F = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R32_FLOAT, 1); - com_ptr texRG8 = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R8G8_UNORM, 1); - com_ptr texRG16F = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R16G16_FLOAT, 1); - com_ptr texRG32F = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R32G32_FLOAT, 1); - com_ptr texRGBA8 = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R8G8B8A8_UNORM, 1); - com_ptr texRGBA16F = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R16G16B16A16_FLOAT, 1); - com_ptr texRGBA32F = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R32G32B32A32_FLOAT, 1); - + ComPtr texR8 = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R8_UNORM, 1); + ComPtr texR16F = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R16_FLOAT, 1); + ComPtr texR32F = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R32_FLOAT, 1); + ComPtr texRG8 = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R8G8_UNORM, 1); + ComPtr texRG16F = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R16G16_FLOAT, 1); + ComPtr texRG32F = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R32G32_FLOAT, 1); + ComPtr texRGBA8 = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R8G8B8A8_UNORM, 1); + ComPtr texRGBA16F = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R16G16B16A16_FLOAT, 1); + ComPtr texRGBA32F = dx.createTexture2d(uint2(32, 32), DXGI_FORMAT_R32G32B32A32_FLOAT, 1); + // Texture SRVs - com_ptr texSRV_R8 = dx.createSRV(texR8); - com_ptr texSRV_R16F = dx.createSRV(texR16F); - com_ptr texSRV_R32F = dx.createSRV(texR32F); - com_ptr texSRV_RG8 = dx.createSRV(texRG8); - com_ptr texSRV_RG16F = dx.createSRV(texRG16F); - com_ptr texSRV_RG32F = dx.createSRV(texRG32F); - com_ptr texSRV_RGBA8 = dx.createSRV(texRGBA8); - com_ptr texSRV_RGBA16F = dx.createSRV(texRGBA16F); - com_ptr texSRV_RGBA32F = dx.createSRV(texRGBA32F); + ShaderResourceView texSRV_R8 = dx.createSRV(texR8.Get()); + ShaderResourceView texSRV_R16F = dx.createSRV(texR16F.Get()); + ShaderResourceView texSRV_R32F = dx.createSRV(texR32F.Get()); + ShaderResourceView texSRV_RG8 = dx.createSRV(texRG8.Get()); + ShaderResourceView texSRV_RG16F = dx.createSRV(texRG16F.Get()); + ShaderResourceView texSRV_RG32F = dx.createSRV(texRG32F.Get()); + ShaderResourceView texSRV_RGBA8 = dx.createSRV(texRGBA8.Get()); + ShaderResourceView texSRV_RGBA16F = dx.createSRV(texRGBA16F.Get()); + ShaderResourceView texSRV_RGBA32F = dx.createSRV(texRGBA32F.Get()); // Samplers - com_ptr samplerNearest = dx.createSampler(DirectXDevice::SamplerType::Nearest); - com_ptr samplerBilinear = dx.createSampler(DirectXDevice::SamplerType::Bilinear); - com_ptr samplerTrilinear = dx.createSampler(DirectXDevice::SamplerType::Trilinear); + SamplerState samplerNearest = dx.createSampler(DirectXDevice::SamplerType::Nearest); + SamplerState samplerBilinear = dx.createSampler(DirectXDevice::SamplerType::Bilinear); + SamplerState samplerTrilinear = dx.createSampler(DirectXDevice::SamplerType::Trilinear); // Setup the constant buffer LoadConstants loadConstants; - com_ptr loadCB = dx.createConstantBuffer(sizeof(LoadConstants)); - com_ptr loadCBUnaligned = dx.createConstantBuffer(sizeof(LoadConstants)); + ComPtr loadCB = dx.createConstantBuffer(sizeof(LoadConstants)); + ComPtr loadCBUnaligned = dx.createConstantBuffer(sizeof(LoadConstants)); loadConstants.elementsMask = 0; // Dummy mask to prevent unwanted compiler optimizations loadConstants.writeIndex = 0xffffffff; // Never write loadConstants.readStartAddress = 0; // Aligned - dx.updateConstantBuffer(loadCB, loadConstants); + dx.updateConstantBuffer(loadCB.Get(), loadConstants); loadConstants.readStartAddress = 4; // Unaligned - dx.updateConstantBuffer(loadCBUnaligned, loadConstants); + dx.updateConstantBuffer(loadCBUnaligned.Get(), loadConstants); // Setup constant buffer with float4 array for constant buffer load benchmarking LoadConstantsWithArray loadConstantsWithArray; - com_ptr loadWithArrayCB = dx.createConstantBuffer(sizeof(LoadConstantsWithArray)); + ComPtr loadWithArrayCB = dx.createConstantBuffer(sizeof(LoadConstantsWithArray)); loadConstantsWithArray.elementsMask = 0; // Dummy mask to prevent unwanted compiler optimizations loadConstantsWithArray.writeIndex = 0xffffffff; // Never write loadConstantsWithArray.readStartAddress = 0; // Aligned memset(loadConstantsWithArray.benchmarkArray, 0, sizeof(loadConstantsWithArray.benchmarkArray)); - dx.updateConstantBuffer(loadWithArrayCB, loadConstantsWithArray); + dx.updateConstantBuffer(loadWithArrayCB.Get(), loadConstantsWithArray); const unsigned numWarmUpFramesBeforeBenchmark = 30; const unsigned numBenchmarkFrames = 30; @@ -232,161 +233,163 @@ int main(int argc, char *argv[]) } }); + dx.beginFrame(); + BenchTest bench(dx, outputUAV); - bench.testCase(shaderLoadTyped1dInvariant, loadCB, typedSRV_R8, "Buffer.Load uniform"); - bench.testCase(shaderLoadTyped1dLinear, loadCB, typedSRV_R8, "Buffer.Load linear"); - bench.testCase(shaderLoadTyped1dRandom, loadCB, typedSRV_R8, "Buffer.Load random"); - bench.testCase(shaderLoadTyped2dInvariant, loadCB, typedSRV_RG8, "Buffer.Load uniform"); - bench.testCase(shaderLoadTyped2dLinear, loadCB, typedSRV_RG8, "Buffer.Load linear"); - bench.testCase(shaderLoadTyped2dRandom, loadCB, typedSRV_RG8, "Buffer.Load random"); - bench.testCase(shaderLoadTyped4dInvariant, loadCB, typedSRV_RGBA8, "Buffer.Load uniform"); - bench.testCase(shaderLoadTyped4dLinear, loadCB, typedSRV_RGBA8, "Buffer.Load linear"); - bench.testCase(shaderLoadTyped4dRandom, loadCB, typedSRV_RGBA8, "Buffer.Load random"); - - bench.testCase(shaderLoadTyped1dInvariant, loadCB, typedSRV_R16F, "Buffer.Load uniform"); - bench.testCase(shaderLoadTyped1dLinear, loadCB, typedSRV_R16F, "Buffer.Load linear"); - bench.testCase(shaderLoadTyped1dRandom, loadCB, typedSRV_R16F, "Buffer.Load random"); - bench.testCase(shaderLoadTyped2dInvariant, loadCB, typedSRV_RG16F, "Buffer.Load uniform"); - bench.testCase(shaderLoadTyped2dLinear, loadCB, typedSRV_RG16F, "Buffer.Load linear"); - bench.testCase(shaderLoadTyped2dRandom, loadCB, typedSRV_RG16F, "Buffer.Load random"); - bench.testCase(shaderLoadTyped4dInvariant, loadCB, typedSRV_RGBA16F, "Buffer.Load uniform"); - bench.testCase(shaderLoadTyped4dLinear, loadCB, typedSRV_RGBA16F, "Buffer.Load linear"); - bench.testCase(shaderLoadTyped4dRandom, loadCB, typedSRV_RGBA16F, "Buffer.Load random"); - - bench.testCase(shaderLoadTyped1dInvariant, loadCB, typedSRV_R32F, "Buffer.Load uniform"); - bench.testCase(shaderLoadTyped1dLinear, loadCB, typedSRV_R32F, "Buffer.Load linear"); - bench.testCase(shaderLoadTyped1dRandom, loadCB, typedSRV_R32F, "Buffer.Load random"); - bench.testCase(shaderLoadTyped2dInvariant, loadCB, typedSRV_RG32F, "Buffer.Load uniform"); - bench.testCase(shaderLoadTyped2dLinear, loadCB, typedSRV_RG32F, "Buffer.Load linear"); - bench.testCase(shaderLoadTyped2dRandom, loadCB, typedSRV_RG32F, "Buffer.Load random"); - bench.testCase(shaderLoadTyped4dInvariant, loadCB, typedSRV_RGBA32F, "Buffer.Load uniform"); - bench.testCase(shaderLoadTyped4dLinear, loadCB, typedSRV_RGBA32F, "Buffer.Load linear"); - bench.testCase(shaderLoadTyped4dRandom, loadCB, typedSRV_RGBA32F, "Buffer.Load random"); - - bench.testCase(shaderLoadRaw1dInvariant, loadCB, byteAddressSRV, "ByteAddressBuffer.Load uniform"); - bench.testCase(shaderLoadRaw1dLinear, loadCB, byteAddressSRV, "ByteAddressBuffer.Load linear"); - bench.testCase(shaderLoadRaw1dRandom, loadCB, byteAddressSRV, "ByteAddressBuffer.Load random"); - bench.testCase(shaderLoadRaw2dInvariant, loadCB, byteAddressSRV, "ByteAddressBuffer.Load2 uniform"); - bench.testCase(shaderLoadRaw2dLinear, loadCB, byteAddressSRV, "ByteAddressBuffer.Load2 linear"); - bench.testCase(shaderLoadRaw2dRandom, loadCB, byteAddressSRV, "ByteAddressBuffer.Load2 random"); - bench.testCase(shaderLoadRaw3dInvariant, loadCB, byteAddressSRV, "ByteAddressBuffer.Load3 uniform"); - bench.testCase(shaderLoadRaw3dLinear, loadCB, byteAddressSRV, "ByteAddressBuffer.Load3 linear"); - bench.testCase(shaderLoadRaw3dRandom, loadCB, byteAddressSRV, "ByteAddressBuffer.Load3 random"); - bench.testCase(shaderLoadRaw4dInvariant, loadCB, byteAddressSRV, "ByteAddressBuffer.Load4 uniform"); - bench.testCase(shaderLoadRaw4dLinear, loadCB, byteAddressSRV, "ByteAddressBuffer.Load4 linear"); - bench.testCase(shaderLoadRaw4dRandom, loadCB, byteAddressSRV, "ByteAddressBuffer.Load4 random"); - - bench.testCase(shaderLoadRaw2dInvariant, loadCBUnaligned, byteAddressSRV, "ByteAddressBuffer.Load2 unaligned uniform"); - bench.testCase(shaderLoadRaw2dLinear, loadCBUnaligned, byteAddressSRV, "ByteAddressBuffer.Load2 unaligned linear"); - bench.testCase(shaderLoadRaw2dRandom, loadCBUnaligned, byteAddressSRV, "ByteAddressBuffer.Load2 unaligned random"); - bench.testCase(shaderLoadRaw4dInvariant, loadCBUnaligned, byteAddressSRV, "ByteAddressBuffer.Load4 unaligned uniform"); - bench.testCase(shaderLoadRaw4dLinear, loadCBUnaligned, byteAddressSRV, "ByteAddressBuffer.Load4 unaligned linear"); - bench.testCase(shaderLoadRaw4dRandom, loadCBUnaligned, byteAddressSRV, "ByteAddressBuffer.Load4 unaligned random"); - - bench.testCase(shaderLoadStructured1dInvariant, loadCB, structuredSRV_R32F, "StructuredBuffer.Load uniform"); - bench.testCase(shaderLoadStructured1dLinear, loadCB, structuredSRV_R32F, "StructuredBuffer.Load linear"); - bench.testCase(shaderLoadStructured1dRandom, loadCB, structuredSRV_R32F, "StructuredBuffer.Load random"); - bench.testCase(shaderLoadStructured2dInvariant, loadCB, structuredSRV_RG32F, "StructuredBuffer.Load uniform"); - bench.testCase(shaderLoadStructured2dLinear, loadCB, structuredSRV_RG32F, "StructuredBuffer.Load linear"); - bench.testCase(shaderLoadStructured2dRandom, loadCB, structuredSRV_RG32F, "StructuredBuffer.Load random"); - bench.testCase(shaderLoadStructured4dInvariant, loadCB, structuredSRV_RGBA32F, "StructuredBuffer.Load uniform"); - bench.testCase(shaderLoadStructured4dLinear, loadCB, structuredSRV_RGBA32F, "StructuredBuffer.Load linear"); - bench.testCase(shaderLoadStructured4dRandom, loadCB, structuredSRV_RGBA32F, "StructuredBuffer.Load random"); - - bench.testCase(shaderLoadConstant4dInvariant, loadWithArrayCB, nullptr, "cbuffer{float4} load uniform"); - bench.testCase(shaderLoadConstant4dLinear, loadWithArrayCB, nullptr, "cbuffer{float4} load linear"); - bench.testCase(shaderLoadConstant4dRandom, loadWithArrayCB, nullptr, "cbuffer{float4} load random"); - - bench.testCase(shaderLoadTex1dInvariant, loadCB, texSRV_R8, "Texture2D.Load uniform"); - bench.testCase(shaderLoadTex1dLinear, loadCB, texSRV_R8, "Texture2D.Load linear"); - bench.testCase(shaderLoadTex1dRandom, loadCB, texSRV_R8, "Texture2D.Load random"); - bench.testCase(shaderLoadTex2dInvariant, loadCB, texSRV_RG8, "Texture2D.Load uniform"); - bench.testCase(shaderLoadTex2dLinear, loadCB, texSRV_RG8, "Texture2D.Load linear"); - bench.testCase(shaderLoadTex2dRandom, loadCB, texSRV_RG8, "Texture2D.Load random"); - bench.testCase(shaderLoadTex4dInvariant, loadCB, texSRV_RGBA8, "Texture2D.Load uniform"); - bench.testCase(shaderLoadTex4dLinear, loadCB, texSRV_RGBA8, "Texture2D.Load linear"); - bench.testCase(shaderLoadTex4dRandom, loadCB, texSRV_RGBA8, "Texture2D.Load random"); - - bench.testCase(shaderLoadTex1dInvariant, loadCB, texSRV_R16F, "Texture2D.Load uniform"); - bench.testCase(shaderLoadTex1dLinear, loadCB, texSRV_R16F, "Texture2D.Load linear"); - bench.testCase(shaderLoadTex1dRandom, loadCB, texSRV_R16F, "Texture2D.Load random"); - bench.testCase(shaderLoadTex2dInvariant, loadCB, texSRV_RG16F, "Texture2D.Load uniform"); - bench.testCase(shaderLoadTex2dLinear, loadCB, texSRV_RG16F, "Texture2D.Load linear"); - bench.testCase(shaderLoadTex2dRandom, loadCB, texSRV_RG16F, "Texture2D.Load random"); - bench.testCase(shaderLoadTex4dInvariant, loadCB, texSRV_RGBA16F, "Texture2D.Load uniform"); - bench.testCase(shaderLoadTex4dLinear, loadCB, texSRV_RGBA16F, "Texture2D.Load linear"); - bench.testCase(shaderLoadTex4dRandom, loadCB, texSRV_RGBA16F, "Texture2D.Load random"); - - bench.testCase(shaderLoadTex1dInvariant, loadCB, texSRV_R32F, "Texture2D.Load uniform"); - bench.testCase(shaderLoadTex1dLinear, loadCB, texSRV_R32F, "Texture2D.Load linear"); - bench.testCase(shaderLoadTex1dRandom, loadCB, texSRV_R32F, "Texture2D.Load random"); - bench.testCase(shaderLoadTex2dInvariant, loadCB, texSRV_RG32F, "Texture2D.Load uniform"); - bench.testCase(shaderLoadTex2dLinear, loadCB, texSRV_RG32F, "Texture2D.Load linear"); - bench.testCase(shaderLoadTex2dRandom, loadCB, texSRV_RG32F, "Texture2D.Load random"); - bench.testCase(shaderLoadTex4dInvariant, loadCB, texSRV_RGBA32F, "Texture2D.Load uniform"); - bench.testCase(shaderLoadTex4dLinear, loadCB, texSRV_RGBA32F, "Texture2D.Load linear"); - bench.testCase(shaderLoadTex4dRandom, loadCB, texSRV_RGBA32F, "Texture2D.Load random"); - - bench.testCaseWithSampler(shaderSampleTex1dInvariant, loadCB, texSRV_R8, samplerNearest, "Texture2D.Sample(nearest) uniform"); - bench.testCaseWithSampler(shaderSampleTex1dLinear, loadCB, texSRV_R8, samplerNearest, "Texture2D.Sample(nearest) linear"); - bench.testCaseWithSampler(shaderSampleTex1dRandom, loadCB, texSRV_R8, samplerNearest, "Texture2D.Sample(nearest) random"); - bench.testCaseWithSampler(shaderSampleTex2dInvariant, loadCB, texSRV_RG8, samplerNearest, "Texture2D.Sample(nearest) uniform"); - bench.testCaseWithSampler(shaderSampleTex2dLinear, loadCB, texSRV_RG8, samplerNearest, "Texture2D.Sample(nearest) linear"); - bench.testCaseWithSampler(shaderSampleTex2dRandom, loadCB, texSRV_RG8, samplerNearest, "Texture2D.Sample(nearest) random"); - bench.testCaseWithSampler(shaderSampleTex4dInvariant, loadCB, texSRV_RGBA8, samplerNearest, "Texture2D.Sample(nearest) uniform"); - bench.testCaseWithSampler(shaderSampleTex4dLinear, loadCB, texSRV_RGBA8, samplerNearest, "Texture2D.Sample(nearest) linear"); - bench.testCaseWithSampler(shaderSampleTex4dRandom, loadCB, texSRV_RGBA8, samplerNearest, "Texture2D.Sample(nearest) random"); - - bench.testCaseWithSampler(shaderSampleTex1dInvariant, loadCB, texSRV_R16F, samplerNearest, "Texture2D.Sample(nearest) uniform"); - bench.testCaseWithSampler(shaderSampleTex1dLinear, loadCB, texSRV_R16F, samplerNearest, "Texture2D.Sample(nearest) linear"); - bench.testCaseWithSampler(shaderSampleTex1dRandom, loadCB, texSRV_R16F, samplerNearest, "Texture2D.Sample(nearest) random"); - bench.testCaseWithSampler(shaderSampleTex2dInvariant, loadCB, texSRV_RG16F, samplerNearest, "Texture2D.Sample(nearest) uniform"); - bench.testCaseWithSampler(shaderSampleTex2dLinear, loadCB, texSRV_RG16F, samplerNearest, "Texture2D.Sample(nearest) linear"); - bench.testCaseWithSampler(shaderSampleTex2dRandom, loadCB, texSRV_RG16F, samplerNearest, "Texture2D.Sample(nearest) random"); - bench.testCaseWithSampler(shaderSampleTex4dInvariant, loadCB, texSRV_RGBA16F, samplerNearest, "Texture2D.Sample(nearest) uniform"); - bench.testCaseWithSampler(shaderSampleTex4dLinear, loadCB, texSRV_RGBA16F, samplerNearest, "Texture2D.Sample(nearest) linear"); - bench.testCaseWithSampler(shaderSampleTex4dRandom, loadCB, texSRV_RGBA16F, samplerNearest, "Texture2D.Sample(nearest) random"); - - bench.testCaseWithSampler(shaderSampleTex1dInvariant, loadCB, texSRV_R32F, samplerNearest, "Texture2D.Sample(nearest) uniform"); - bench.testCaseWithSampler(shaderSampleTex1dLinear, loadCB, texSRV_R32F, samplerNearest, "Texture2D.Sample(nearest) linear"); - bench.testCaseWithSampler(shaderSampleTex1dRandom, loadCB, texSRV_R32F, samplerNearest, "Texture2D.Sample(nearest) random"); - bench.testCaseWithSampler(shaderSampleTex2dInvariant, loadCB, texSRV_RG32F, samplerNearest, "Texture2D.Sample(nearest) uniform"); - bench.testCaseWithSampler(shaderSampleTex2dLinear, loadCB, texSRV_RG32F, samplerNearest, "Texture2D.Sample(nearest) linear"); - bench.testCaseWithSampler(shaderSampleTex2dRandom, loadCB, texSRV_RG32F, samplerNearest, "Texture2D.Sample(nearest) random"); - bench.testCaseWithSampler(shaderSampleTex4dInvariant, loadCB, texSRV_RGBA32F, samplerNearest, "Texture2D.Sample(bilinear) uniform"); - bench.testCaseWithSampler(shaderSampleTex4dLinear, loadCB, texSRV_RGBA32F, samplerNearest, "Texture2D.Sample(nearest) linear"); - bench.testCaseWithSampler(shaderSampleTex4dRandom, loadCB, texSRV_RGBA32F, samplerNearest, "Texture2D.Sample(nearest) random"); - - bench.testCaseWithSampler(shaderSampleTex1dInvariant, loadCB, texSRV_R8, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); - bench.testCaseWithSampler(shaderSampleTex1dLinear, loadCB, texSRV_R8, samplerBilinear, "Texture2D.Sample(bilinear) linear"); - bench.testCaseWithSampler(shaderSampleTex1dRandom, loadCB, texSRV_R8, samplerBilinear, "Texture2D.Sample(bilinear) random"); - bench.testCaseWithSampler(shaderSampleTex2dInvariant, loadCB, texSRV_RG8, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); - bench.testCaseWithSampler(shaderSampleTex2dLinear, loadCB, texSRV_RG8, samplerBilinear, "Texture2D.Sample(bilinear) linear"); - bench.testCaseWithSampler(shaderSampleTex2dRandom, loadCB, texSRV_RG8, samplerBilinear, "Texture2D.Sample(bilinear) random"); - bench.testCaseWithSampler(shaderSampleTex4dInvariant, loadCB, texSRV_RGBA8, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); - bench.testCaseWithSampler(shaderSampleTex4dLinear, loadCB, texSRV_RGBA8, samplerBilinear, "Texture2D.Sample(bilinear) linear"); - bench.testCaseWithSampler(shaderSampleTex4dRandom, loadCB, texSRV_RGBA8, samplerBilinear, "Texture2D.Sample(bilinear) random"); - - bench.testCaseWithSampler(shaderSampleTex1dInvariant, loadCB, texSRV_R16F, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); - bench.testCaseWithSampler(shaderSampleTex1dLinear, loadCB, texSRV_R16F, samplerBilinear, "Texture2D.Sample(bilinear) linear"); - bench.testCaseWithSampler(shaderSampleTex1dRandom, loadCB, texSRV_R16F, samplerBilinear, "Texture2D.Sample(bilinear) random"); - bench.testCaseWithSampler(shaderSampleTex2dInvariant, loadCB, texSRV_RG16F, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); - bench.testCaseWithSampler(shaderSampleTex2dLinear, loadCB, texSRV_RG16F, samplerBilinear, "Texture2D.Sample(bilinear) linear"); - bench.testCaseWithSampler(shaderSampleTex2dRandom, loadCB, texSRV_RG16F, samplerBilinear, "Texture2D.Sample(bilinear) random"); - bench.testCaseWithSampler(shaderSampleTex4dInvariant, loadCB, texSRV_RGBA16F, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); - bench.testCaseWithSampler(shaderSampleTex4dLinear, loadCB, texSRV_RGBA16F, samplerBilinear, "Texture2D.Sample(bilinear) linear"); - bench.testCaseWithSampler(shaderSampleTex4dRandom, loadCB, texSRV_RGBA16F, samplerBilinear, "Texture2D.Sample(bilinear) random"); - - bench.testCaseWithSampler(shaderSampleTex1dInvariant, loadCB, texSRV_R32F, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); - bench.testCaseWithSampler(shaderSampleTex1dLinear, loadCB, texSRV_R32F, samplerBilinear, "Texture2D.Sample(bilinear) linear"); - bench.testCaseWithSampler(shaderSampleTex1dRandom, loadCB, texSRV_R32F, samplerBilinear, "Texture2D.Sample(bilinear) random"); - bench.testCaseWithSampler(shaderSampleTex2dInvariant, loadCB, texSRV_RG32F, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); - bench.testCaseWithSampler(shaderSampleTex2dLinear, loadCB, texSRV_RG32F, samplerBilinear, "Texture2D.Sample(bilinear) linear"); - bench.testCaseWithSampler(shaderSampleTex2dRandom, loadCB, texSRV_RG32F, samplerBilinear, "Texture2D.Sample(bilinear) random"); - bench.testCaseWithSampler(shaderSampleTex4dInvariant, loadCB, texSRV_RGBA32F, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); - bench.testCaseWithSampler(shaderSampleTex4dLinear, loadCB, texSRV_RGBA32F, samplerBilinear, "Texture2D.Sample(bilinear) linear"); - bench.testCaseWithSampler(shaderSampleTex4dRandom, loadCB, texSRV_RGBA32F, samplerBilinear, "Texture2D.Sample(bilinear) random"); + bench.testCase(shaderLoadTyped1dInvariant, loadCB.Get(), typedSRV_R8, "Buffer.Load uniform"); + bench.testCase(shaderLoadTyped1dLinear, loadCB.Get(), typedSRV_R8, "Buffer.Load linear"); + bench.testCase(shaderLoadTyped1dRandom, loadCB.Get(), typedSRV_R8, "Buffer.Load random"); + bench.testCase(shaderLoadTyped2dInvariant, loadCB.Get(), typedSRV_RG8, "Buffer.Load uniform"); + bench.testCase(shaderLoadTyped2dLinear, loadCB.Get(), typedSRV_RG8, "Buffer.Load linear"); + bench.testCase(shaderLoadTyped2dRandom, loadCB.Get(), typedSRV_RG8, "Buffer.Load random"); + bench.testCase(shaderLoadTyped4dInvariant, loadCB.Get(), typedSRV_RGBA8, "Buffer.Load uniform"); + bench.testCase(shaderLoadTyped4dLinear, loadCB.Get(), typedSRV_RGBA8, "Buffer.Load linear"); + bench.testCase(shaderLoadTyped4dRandom, loadCB.Get(), typedSRV_RGBA8, "Buffer.Load random"); + + bench.testCase(shaderLoadTyped1dInvariant, loadCB.Get(), typedSRV_R16F, "Buffer.Load uniform"); + bench.testCase(shaderLoadTyped1dLinear, loadCB.Get(), typedSRV_R16F, "Buffer.Load linear"); + bench.testCase(shaderLoadTyped1dRandom, loadCB.Get(), typedSRV_R16F, "Buffer.Load random"); + bench.testCase(shaderLoadTyped2dInvariant, loadCB.Get(), typedSRV_RG16F, "Buffer.Load uniform"); + bench.testCase(shaderLoadTyped2dLinear, loadCB.Get(), typedSRV_RG16F, "Buffer.Load linear"); + bench.testCase(shaderLoadTyped2dRandom, loadCB.Get(), typedSRV_RG16F, "Buffer.Load random"); + bench.testCase(shaderLoadTyped4dInvariant, loadCB.Get(), typedSRV_RGBA16F, "Buffer.Load uniform"); + bench.testCase(shaderLoadTyped4dLinear, loadCB.Get(), typedSRV_RGBA16F, "Buffer.Load linear"); + bench.testCase(shaderLoadTyped4dRandom, loadCB.Get(), typedSRV_RGBA16F, "Buffer.Load random"); + + bench.testCase(shaderLoadTyped1dInvariant, loadCB.Get(), typedSRV_R32F, "Buffer.Load uniform"); + bench.testCase(shaderLoadTyped1dLinear, loadCB.Get(), typedSRV_R32F, "Buffer.Load linear"); + bench.testCase(shaderLoadTyped1dRandom, loadCB.Get(), typedSRV_R32F, "Buffer.Load random"); + bench.testCase(shaderLoadTyped2dInvariant, loadCB.Get(), typedSRV_RG32F, "Buffer.Load uniform"); + bench.testCase(shaderLoadTyped2dLinear, loadCB.Get(), typedSRV_RG32F, "Buffer.Load linear"); + bench.testCase(shaderLoadTyped2dRandom, loadCB.Get(), typedSRV_RG32F, "Buffer.Load random"); + bench.testCase(shaderLoadTyped4dInvariant, loadCB.Get(), typedSRV_RGBA32F, "Buffer.Load uniform"); + bench.testCase(shaderLoadTyped4dLinear, loadCB.Get(), typedSRV_RGBA32F, "Buffer.Load linear"); + bench.testCase(shaderLoadTyped4dRandom, loadCB.Get(), typedSRV_RGBA32F, "Buffer.Load random"); + + bench.testCase(shaderLoadRaw1dInvariant, loadCB.Get(), byteAddressSRV, "ByteAddressBuffer.Load uniform"); + bench.testCase(shaderLoadRaw1dLinear, loadCB.Get(), byteAddressSRV, "ByteAddressBuffer.Load linear"); + bench.testCase(shaderLoadRaw1dRandom, loadCB.Get(), byteAddressSRV, "ByteAddressBuffer.Load random"); + bench.testCase(shaderLoadRaw2dInvariant, loadCB.Get(), byteAddressSRV, "ByteAddressBuffer.Load2 uniform"); + bench.testCase(shaderLoadRaw2dLinear, loadCB.Get(), byteAddressSRV, "ByteAddressBuffer.Load2 linear"); + bench.testCase(shaderLoadRaw2dRandom, loadCB.Get(), byteAddressSRV, "ByteAddressBuffer.Load2 random"); + bench.testCase(shaderLoadRaw3dInvariant, loadCB.Get(), byteAddressSRV, "ByteAddressBuffer.Load3 uniform"); + bench.testCase(shaderLoadRaw3dLinear, loadCB.Get(), byteAddressSRV, "ByteAddressBuffer.Load3 linear"); + bench.testCase(shaderLoadRaw3dRandom, loadCB.Get(), byteAddressSRV, "ByteAddressBuffer.Load3 random"); + bench.testCase(shaderLoadRaw4dInvariant, loadCB.Get(), byteAddressSRV, "ByteAddressBuffer.Load4 uniform"); + bench.testCase(shaderLoadRaw4dLinear, loadCB.Get(), byteAddressSRV, "ByteAddressBuffer.Load4 linear"); + bench.testCase(shaderLoadRaw4dRandom, loadCB.Get(), byteAddressSRV, "ByteAddressBuffer.Load4 random"); + + bench.testCase(shaderLoadRaw2dInvariant, loadCBUnaligned.Get(), byteAddressSRV, "ByteAddressBuffer.Load2 unaligned uniform"); + bench.testCase(shaderLoadRaw2dLinear, loadCBUnaligned.Get(), byteAddressSRV, "ByteAddressBuffer.Load2 unaligned linear"); + bench.testCase(shaderLoadRaw2dRandom, loadCBUnaligned.Get(), byteAddressSRV, "ByteAddressBuffer.Load2 unaligned random"); + bench.testCase(shaderLoadRaw4dInvariant, loadCBUnaligned.Get(), byteAddressSRV, "ByteAddressBuffer.Load4 unaligned uniform"); + bench.testCase(shaderLoadRaw4dLinear, loadCBUnaligned.Get(), byteAddressSRV, "ByteAddressBuffer.Load4 unaligned linear"); + bench.testCase(shaderLoadRaw4dRandom, loadCBUnaligned.Get(), byteAddressSRV, "ByteAddressBuffer.Load4 unaligned random"); + + bench.testCase(shaderLoadStructured1dInvariant, loadCB.Get(), structuredSRV_R32F, "StructuredBuffer.Load uniform"); + bench.testCase(shaderLoadStructured1dLinear, loadCB.Get(), structuredSRV_R32F, "StructuredBuffer.Load linear"); + bench.testCase(shaderLoadStructured1dRandom, loadCB.Get(), structuredSRV_R32F, "StructuredBuffer.Load random"); + bench.testCase(shaderLoadStructured2dInvariant, loadCB.Get(), structuredSRV_RG32F, "StructuredBuffer.Load uniform"); + bench.testCase(shaderLoadStructured2dLinear, loadCB.Get(), structuredSRV_RG32F, "StructuredBuffer.Load linear"); + bench.testCase(shaderLoadStructured2dRandom, loadCB.Get(), structuredSRV_RG32F, "StructuredBuffer.Load random"); + bench.testCase(shaderLoadStructured4dInvariant, loadCB.Get(), structuredSRV_RGBA32F, "StructuredBuffer.Load uniform"); + bench.testCase(shaderLoadStructured4dLinear, loadCB.Get(), structuredSRV_RGBA32F, "StructuredBuffer.Load linear"); + bench.testCase(shaderLoadStructured4dRandom, loadCB.Get(), structuredSRV_RGBA32F, "StructuredBuffer.Load random"); + + bench.testCase(shaderLoadConstant4dInvariant, loadWithArrayCB.Get(), {}, "cbuffer{float4} load uniform"); + bench.testCase(shaderLoadConstant4dLinear, loadWithArrayCB.Get(), {}, "cbuffer{float4} load linear"); + bench.testCase(shaderLoadConstant4dRandom, loadWithArrayCB.Get(), {}, "cbuffer{float4} load random"); + + bench.testCase(shaderLoadTex1dInvariant, loadCB.Get(), texSRV_R8, "Texture2D.Load uniform"); + bench.testCase(shaderLoadTex1dLinear, loadCB.Get(), texSRV_R8, "Texture2D.Load linear"); + bench.testCase(shaderLoadTex1dRandom, loadCB.Get(), texSRV_R8, "Texture2D.Load random"); + bench.testCase(shaderLoadTex2dInvariant, loadCB.Get(), texSRV_RG8, "Texture2D.Load uniform"); + bench.testCase(shaderLoadTex2dLinear, loadCB.Get(), texSRV_RG8, "Texture2D.Load linear"); + bench.testCase(shaderLoadTex2dRandom, loadCB.Get(), texSRV_RG8, "Texture2D.Load random"); + bench.testCase(shaderLoadTex4dInvariant, loadCB.Get(), texSRV_RGBA8, "Texture2D.Load uniform"); + bench.testCase(shaderLoadTex4dLinear, loadCB.Get(), texSRV_RGBA8, "Texture2D.Load linear"); + bench.testCase(shaderLoadTex4dRandom, loadCB.Get(), texSRV_RGBA8, "Texture2D.Load random"); + + bench.testCase(shaderLoadTex1dInvariant, loadCB.Get(), texSRV_R16F, "Texture2D.Load uniform"); + bench.testCase(shaderLoadTex1dLinear, loadCB.Get(), texSRV_R16F, "Texture2D.Load linear"); + bench.testCase(shaderLoadTex1dRandom, loadCB.Get(), texSRV_R16F, "Texture2D.Load random"); + bench.testCase(shaderLoadTex2dInvariant, loadCB.Get(), texSRV_RG16F, "Texture2D.Load uniform"); + bench.testCase(shaderLoadTex2dLinear, loadCB.Get(), texSRV_RG16F, "Texture2D.Load linear"); + bench.testCase(shaderLoadTex2dRandom, loadCB.Get(), texSRV_RG16F, "Texture2D.Load random"); + bench.testCase(shaderLoadTex4dInvariant, loadCB.Get(), texSRV_RGBA16F, "Texture2D.Load uniform"); + bench.testCase(shaderLoadTex4dLinear, loadCB.Get(), texSRV_RGBA16F, "Texture2D.Load linear"); + bench.testCase(shaderLoadTex4dRandom, loadCB.Get(), texSRV_RGBA16F, "Texture2D.Load random"); + + bench.testCase(shaderLoadTex1dInvariant, loadCB.Get(), texSRV_R32F, "Texture2D.Load uniform"); + bench.testCase(shaderLoadTex1dLinear, loadCB.Get(), texSRV_R32F, "Texture2D.Load linear"); + bench.testCase(shaderLoadTex1dRandom, loadCB.Get(), texSRV_R32F, "Texture2D.Load random"); + bench.testCase(shaderLoadTex2dInvariant, loadCB.Get(), texSRV_RG32F, "Texture2D.Load uniform"); + bench.testCase(shaderLoadTex2dLinear, loadCB.Get(), texSRV_RG32F, "Texture2D.Load linear"); + bench.testCase(shaderLoadTex2dRandom, loadCB.Get(), texSRV_RG32F, "Texture2D.Load random"); + bench.testCase(shaderLoadTex4dInvariant, loadCB.Get(), texSRV_RGBA32F, "Texture2D.Load uniform"); + bench.testCase(shaderLoadTex4dLinear, loadCB.Get(), texSRV_RGBA32F, "Texture2D.Load linear"); + bench.testCase(shaderLoadTex4dRandom, loadCB.Get(), texSRV_RGBA32F, "Texture2D.Load random"); + + bench.testCaseWithSampler(shaderSampleTex1dInvariant, loadCB.Get(), texSRV_R8, samplerNearest, "Texture2D.Sample(nearest) uniform"); + bench.testCaseWithSampler(shaderSampleTex1dLinear, loadCB.Get(), texSRV_R8, samplerNearest, "Texture2D.Sample(nearest) linear"); + bench.testCaseWithSampler(shaderSampleTex1dRandom, loadCB.Get(), texSRV_R8, samplerNearest, "Texture2D.Sample(nearest) random"); + bench.testCaseWithSampler(shaderSampleTex2dInvariant, loadCB.Get(), texSRV_RG8, samplerNearest, "Texture2D.Sample(nearest) uniform"); + bench.testCaseWithSampler(shaderSampleTex2dLinear, loadCB.Get(), texSRV_RG8, samplerNearest, "Texture2D.Sample(nearest) linear"); + bench.testCaseWithSampler(shaderSampleTex2dRandom, loadCB.Get(), texSRV_RG8, samplerNearest, "Texture2D.Sample(nearest) random"); + bench.testCaseWithSampler(shaderSampleTex4dInvariant, loadCB.Get(), texSRV_RGBA8, samplerNearest, "Texture2D.Sample(nearest) uniform"); + bench.testCaseWithSampler(shaderSampleTex4dLinear, loadCB.Get(), texSRV_RGBA8, samplerNearest, "Texture2D.Sample(nearest) linear"); + bench.testCaseWithSampler(shaderSampleTex4dRandom, loadCB.Get(), texSRV_RGBA8, samplerNearest, "Texture2D.Sample(nearest) random"); + + bench.testCaseWithSampler(shaderSampleTex1dInvariant, loadCB.Get(), texSRV_R16F, samplerNearest, "Texture2D.Sample(nearest) uniform"); + bench.testCaseWithSampler(shaderSampleTex1dLinear, loadCB.Get(), texSRV_R16F, samplerNearest, "Texture2D.Sample(nearest) linear"); + bench.testCaseWithSampler(shaderSampleTex1dRandom, loadCB.Get(), texSRV_R16F, samplerNearest, "Texture2D.Sample(nearest) random"); + bench.testCaseWithSampler(shaderSampleTex2dInvariant, loadCB.Get(), texSRV_RG16F, samplerNearest, "Texture2D.Sample(nearest) uniform"); + bench.testCaseWithSampler(shaderSampleTex2dLinear, loadCB.Get(), texSRV_RG16F, samplerNearest, "Texture2D.Sample(nearest) linear"); + bench.testCaseWithSampler(shaderSampleTex2dRandom, loadCB.Get(), texSRV_RG16F, samplerNearest, "Texture2D.Sample(nearest) random"); + bench.testCaseWithSampler(shaderSampleTex4dInvariant, loadCB.Get(), texSRV_RGBA16F, samplerNearest, "Texture2D.Sample(nearest) uniform"); + bench.testCaseWithSampler(shaderSampleTex4dLinear, loadCB.Get(), texSRV_RGBA16F, samplerNearest, "Texture2D.Sample(nearest) linear"); + bench.testCaseWithSampler(shaderSampleTex4dRandom, loadCB.Get(), texSRV_RGBA16F, samplerNearest, "Texture2D.Sample(nearest) random"); + + bench.testCaseWithSampler(shaderSampleTex1dInvariant, loadCB.Get(), texSRV_R32F, samplerNearest, "Texture2D.Sample(nearest) uniform"); + bench.testCaseWithSampler(shaderSampleTex1dLinear, loadCB.Get(), texSRV_R32F, samplerNearest, "Texture2D.Sample(nearest) linear"); + bench.testCaseWithSampler(shaderSampleTex1dRandom, loadCB.Get(), texSRV_R32F, samplerNearest, "Texture2D.Sample(nearest) random"); + bench.testCaseWithSampler(shaderSampleTex2dInvariant, loadCB.Get(), texSRV_RG32F, samplerNearest, "Texture2D.Sample(nearest) uniform"); + bench.testCaseWithSampler(shaderSampleTex2dLinear, loadCB.Get(), texSRV_RG32F, samplerNearest, "Texture2D.Sample(nearest) linear"); + bench.testCaseWithSampler(shaderSampleTex2dRandom, loadCB.Get(), texSRV_RG32F, samplerNearest, "Texture2D.Sample(nearest) random"); + bench.testCaseWithSampler(shaderSampleTex4dInvariant, loadCB.Get(), texSRV_RGBA32F, samplerNearest, "Texture2D.Sample(bilinear) uniform"); + bench.testCaseWithSampler(shaderSampleTex4dLinear, loadCB.Get(), texSRV_RGBA32F, samplerNearest, "Texture2D.Sample(nearest) linear"); + bench.testCaseWithSampler(shaderSampleTex4dRandom, loadCB.Get(), texSRV_RGBA32F, samplerNearest, "Texture2D.Sample(nearest) random"); + + bench.testCaseWithSampler(shaderSampleTex1dInvariant, loadCB.Get(), texSRV_R8, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); + bench.testCaseWithSampler(shaderSampleTex1dLinear, loadCB.Get(), texSRV_R8, samplerBilinear, "Texture2D.Sample(bilinear) linear"); + bench.testCaseWithSampler(shaderSampleTex1dRandom, loadCB.Get(), texSRV_R8, samplerBilinear, "Texture2D.Sample(bilinear) random"); + bench.testCaseWithSampler(shaderSampleTex2dInvariant, loadCB.Get(), texSRV_RG8, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); + bench.testCaseWithSampler(shaderSampleTex2dLinear, loadCB.Get(), texSRV_RG8, samplerBilinear, "Texture2D.Sample(bilinear) linear"); + bench.testCaseWithSampler(shaderSampleTex2dRandom, loadCB.Get(), texSRV_RG8, samplerBilinear, "Texture2D.Sample(bilinear) random"); + bench.testCaseWithSampler(shaderSampleTex4dInvariant, loadCB.Get(), texSRV_RGBA8, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); + bench.testCaseWithSampler(shaderSampleTex4dLinear, loadCB.Get(), texSRV_RGBA8, samplerBilinear, "Texture2D.Sample(bilinear) linear"); + bench.testCaseWithSampler(shaderSampleTex4dRandom, loadCB.Get(), texSRV_RGBA8, samplerBilinear, "Texture2D.Sample(bilinear) random"); + + bench.testCaseWithSampler(shaderSampleTex1dInvariant, loadCB.Get(), texSRV_R16F, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); + bench.testCaseWithSampler(shaderSampleTex1dLinear, loadCB.Get(), texSRV_R16F, samplerBilinear, "Texture2D.Sample(bilinear) linear"); + bench.testCaseWithSampler(shaderSampleTex1dRandom, loadCB.Get(), texSRV_R16F, samplerBilinear, "Texture2D.Sample(bilinear) random"); + bench.testCaseWithSampler(shaderSampleTex2dInvariant, loadCB.Get(), texSRV_RG16F, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); + bench.testCaseWithSampler(shaderSampleTex2dLinear, loadCB.Get(), texSRV_RG16F, samplerBilinear, "Texture2D.Sample(bilinear) linear"); + bench.testCaseWithSampler(shaderSampleTex2dRandom, loadCB.Get(), texSRV_RG16F, samplerBilinear, "Texture2D.Sample(bilinear) random"); + bench.testCaseWithSampler(shaderSampleTex4dInvariant, loadCB.Get(), texSRV_RGBA16F, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); + bench.testCaseWithSampler(shaderSampleTex4dLinear, loadCB.Get(), texSRV_RGBA16F, samplerBilinear, "Texture2D.Sample(bilinear) linear"); + bench.testCaseWithSampler(shaderSampleTex4dRandom, loadCB.Get(), texSRV_RGBA16F, samplerBilinear, "Texture2D.Sample(bilinear) random"); + + bench.testCaseWithSampler(shaderSampleTex1dInvariant, loadCB.Get(), texSRV_R32F, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); + bench.testCaseWithSampler(shaderSampleTex1dLinear, loadCB.Get(), texSRV_R32F, samplerBilinear, "Texture2D.Sample(bilinear) linear"); + bench.testCaseWithSampler(shaderSampleTex1dRandom, loadCB.Get(), texSRV_R32F, samplerBilinear, "Texture2D.Sample(bilinear) random"); + bench.testCaseWithSampler(shaderSampleTex2dInvariant, loadCB.Get(), texSRV_RG32F, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); + bench.testCaseWithSampler(shaderSampleTex2dLinear, loadCB.Get(), texSRV_RG32F, samplerBilinear, "Texture2D.Sample(bilinear) linear"); + bench.testCaseWithSampler(shaderSampleTex2dRandom, loadCB.Get(), texSRV_RG32F, samplerBilinear, "Texture2D.Sample(bilinear) random"); + bench.testCaseWithSampler(shaderSampleTex4dInvariant, loadCB.Get(), texSRV_RGBA32F, samplerBilinear, "Texture2D.Sample(bilinear) uniform"); + bench.testCaseWithSampler(shaderSampleTex4dLinear, loadCB.Get(), texSRV_RGBA32F, samplerBilinear, "Texture2D.Sample(bilinear) linear"); + bench.testCaseWithSampler(shaderSampleTex4dRandom, loadCB.Get(), texSRV_RGBA32F, samplerBilinear, "Texture2D.Sample(bilinear) random"); dx.presentFrame(); diff --git a/perftest/perftest.vcxproj b/perftest/perftest.vcxproj index 008d384..99e1aa3 100644 --- a/perftest/perftest.vcxproj +++ b/perftest/perftest.vcxproj @@ -1,5 +1,5 @@  - + Debug @@ -78,8 +78,9 @@ true Compute - 5.0 + 6.0 $(ProjectDir)\shaders\%(Filename).cso + /Fd $(ProjectDir)\shaders\ %(AdditionalOptions) @@ -87,15 +88,17 @@ Level3 Disabled true + stdcpp20 - DXGI.lib;d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;dxguid.lib;%(AdditionalDependencies) + DXGI.lib;d3d12.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true Compute - 5.0 + 6.0 $(ProjectDir)\shaders\%(Filename).cso + /Fd $(ProjectDir)\shaders\ %(AdditionalOptions) @@ -113,8 +116,9 @@ true Compute - 5.0 + 6.0 $(ProjectDir)\shaders\%(Filename).cso + /Fd $(ProjectDir)\shaders\ %(AdditionalOptions) @@ -124,17 +128,19 @@ true true true + stdcpp20 true true - DXGI.lib;d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;dxguid.lib;%(AdditionalDependencies) + DXGI.lib;d3d12.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true Compute - 5.0 + 6.0 $(ProjectDir)\shaders\%(Filename).cso + /Fd $(ProjectDir)\shaders\ %(AdditionalOptions) @@ -212,9 +218,17 @@ + + + + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + \ No newline at end of file diff --git a/perftest/perftest.vcxproj.filters b/perftest/perftest.vcxproj.filters index bfbc7cc..2634070 100644 --- a/perftest/perftest.vcxproj.filters +++ b/perftest/perftest.vcxproj.filters @@ -249,5 +249,6 @@ Shaders\texture_sample + \ No newline at end of file diff --git a/perftest/sampleTexBody.hlsli b/perftest/sampleTexBody.hlsli index d267ef0..74df4f5 100644 --- a/perftest/sampleTexBody.hlsli +++ b/perftest/sampleTexBody.hlsli @@ -8,10 +8,18 @@ cbuffer CB0 : register(b0) LoadConstants loadConstants; }; +#define ROOT_SIGNATURE \ + "DescriptorTable(" \ + "CBV(b0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE),"\ + "SRV(t0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE),"\ + "UAV(u0, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)),"\ + "DescriptorTable(Sampler(s0))" + #define THREAD_GROUP_DIM 16 groupshared float dummyLDS[THREAD_GROUP_DIM][THREAD_GROUP_DIM]; +[RootSignature(ROOT_SIGNATURE)] [numthreads(THREAD_GROUP_DIM, THREAD_GROUP_DIM, 1)] void main(uint3 tid : SV_DispatchThreadID, uint3 gid : SV_GroupThreadID) { From 8655de5eb757fa3df5b4776cf150c847848fd63d Mon Sep 17 00:00:00 2001 From: Ruslan Kutdusov Date: Mon, 21 Apr 2025 22:16:52 +0100 Subject: [PATCH 5/6] add new data to README --- README.md | 427 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 427 insertions(+) diff --git a/README.md b/README.md index 5f51a7a..46f1142 100644 --- a/README.md +++ b/README.md @@ -601,6 +601,148 @@ Texture2D.Load random: 12.804ms 0.985x ``` **AMD Navi** TODO. +### AMD Navi 2 (RX 6600) Dx12+Dxc +```markdown +Buffer.Load uniform: 5.486ms 0.183ms 0.000ms 2.596x +Buffer.Load linear: 5.884ms 0.196ms 0.007ms 2.421x +Buffer.Load random: 5.263ms 0.175ms 0.000ms 2.706x +Buffer.Load uniform: 7.447ms 0.248ms 0.004ms 1.912x +Buffer.Load linear: 7.660ms 0.255ms 0.004ms 1.859x +Buffer.Load random: 7.617ms 0.254ms 0.001ms 1.870x +Buffer.Load uniform: 13.659ms 0.455ms 0.001ms 1.043x +Buffer.Load linear: 14.304ms 0.477ms 0.006ms 0.996x +Buffer.Load random: 14.242ms 0.475ms 0.001ms 1.000x +Buffer.Load uniform: 6.374ms 0.212ms 0.063ms 2.234x +Buffer.Load linear: 5.305ms 0.177ms 0.007ms 2.685x +Buffer.Load random: 6.765ms 0.225ms 0.078ms 2.105x +Buffer.Load uniform: 8.138ms 0.271ms 0.066ms 1.750x +Buffer.Load linear: 7.581ms 0.253ms 0.004ms 1.879x +Buffer.Load random: 7.224ms 0.241ms 0.040ms 1.971x +Buffer.Load uniform: 14.570ms 0.486ms 0.056ms 0.977x +Buffer.Load linear: 14.621ms 0.487ms 0.045ms 0.974x +Buffer.Load random: 14.248ms 0.475ms 0.001ms 1.000x +Buffer.Load uniform: 5.125ms 0.171ms 0.000ms 2.779x +Buffer.Load linear: 5.872ms 0.196ms 0.003ms 2.425x +Buffer.Load random: 5.880ms 0.196ms 0.001ms 2.422x +Buffer.Load uniform: 7.474ms 0.249ms 0.001ms 1.906x +Buffer.Load linear: 6.962ms 0.232ms 0.000ms 2.046x +Buffer.Load random: 7.633ms 0.254ms 0.001ms 1.866x +Buffer.Load uniform: 14.517ms 0.484ms 0.042ms 0.981x +Buffer.Load linear: 14.454ms 0.482ms 0.002ms 0.985x +Buffer.Load random: 13.974ms 0.466ms 0.043ms 1.019x +ByteAddressBuffer.Load uniform: 8.129ms 0.271ms 0.001ms 1.752x +ByteAddressBuffer.Load linear: 6.275ms 0.209ms 0.001ms 2.270x +ByteAddressBuffer.Load random: 5.941ms 0.198ms 0.001ms 2.397x +ByteAddressBuffer.Load2 uniform: 7.758ms 0.259ms 0.001ms 1.836x +ByteAddressBuffer.Load2 linear: 8.005ms 0.267ms 0.001ms 1.779x +ByteAddressBuffer.Load2 random: 7.686ms 0.256ms 0.001ms 1.853x +ByteAddressBuffer.Load3 uniform: 8.614ms 0.287ms 0.050ms 1.653x +ByteAddressBuffer.Load3 linear: 17.324ms 0.577ms 0.005ms 0.822x +ByteAddressBuffer.Load3 random: 27.938ms 0.931ms 0.003ms 0.510x +ByteAddressBuffer.Load4 uniform: 8.838ms 0.295ms 0.040ms 1.611x +ByteAddressBuffer.Load4 linear: 14.384ms 0.479ms 0.003ms 0.990x +ByteAddressBuffer.Load4 random: 13.739ms 0.458ms 0.001ms 1.037x +ByteAddressBuffer.Load2 unaligned uniform: 8.841ms 0.295ms 0.005ms 1.611x +ByteAddressBuffer.Load2 unaligned linear: 14.623ms 0.487ms 0.038ms 0.974x +ByteAddressBuffer.Load2 unaligned random: 14.369ms 0.479ms 0.002ms 0.991x +ByteAddressBuffer.Load4 unaligned uniform: 8.303ms 0.277ms 0.052ms 1.715x +ByteAddressBuffer.Load4 unaligned linear: 21.036ms 0.701ms 0.002ms 0.677x +ByteAddressBuffer.Load4 unaligned random: 27.566ms 0.919ms 0.001ms 0.517x +StructuredBuffer.Load uniform: 10.128ms 0.338ms 0.039ms 1.406x +StructuredBuffer.Load linear: 5.907ms 0.197ms 0.002ms 2.411x +StructuredBuffer.Load random: 6.344ms 0.211ms 0.056ms 2.245x +StructuredBuffer.Load uniform: 9.890ms 0.330ms 0.002ms 1.440x +StructuredBuffer.Load linear: 7.637ms 0.255ms 0.001ms 1.865x +StructuredBuffer.Load random: 7.002ms 0.233ms 0.000ms 2.034x +StructuredBuffer.Load uniform: 10.471ms 0.349ms 0.053ms 1.360x +StructuredBuffer.Load linear: 14.308ms 0.477ms 0.001ms 0.995x +StructuredBuffer.Load random: 14.467ms 0.482ms 0.040ms 0.984x +cbuffer{float4} load uniform: 11.762ms 0.392ms 0.004ms 1.211x +cbuffer{float4} load linear: 15.887ms 0.530ms 0.001ms 0.896x +cbuffer{float4} load random: 13.946ms 0.465ms 0.048ms 1.021x +Texture2D.Load uniform: 7.705ms 0.257ms 0.002ms 1.848x +Texture2D.Load linear: 7.764ms 0.259ms 0.004ms 1.834x +Texture2D.Load random: 7.048ms 0.235ms 0.000ms 2.021x +Texture2D.Load uniform: 7.778ms 0.259ms 0.002ms 1.831x +Texture2D.Load linear: 8.584ms 0.286ms 0.045ms 1.659x +Texture2D.Load random: 8.243ms 0.275ms 0.001ms 1.728x +Texture2D.Load uniform: 14.296ms 0.477ms 0.002ms 0.996x +Texture2D.Load linear: 14.736ms 0.491ms 0.043ms 0.967x +Texture2D.Load random: 14.383ms 0.479ms 0.001ms 0.990x +Texture2D.Load uniform: 7.735ms 0.258ms 0.001ms 1.841x +Texture2D.Load linear: 7.871ms 0.262ms 0.001ms 1.809x +Texture2D.Load random: 7.720ms 0.257ms 0.004ms 1.845x +Texture2D.Load uniform: 7.901ms 0.263ms 0.001ms 1.803x +Texture2D.Load linear: 9.486ms 0.316ms 0.001ms 1.501x +Texture2D.Load random: 7.742ms 0.258ms 0.001ms 1.840x +Texture2D.Load uniform: 14.601ms 0.487ms 0.002ms 0.975x +Texture2D.Load linear: 14.724ms 0.491ms 0.002ms 0.967x +Texture2D.Load random: 14.600ms 0.487ms 0.001ms 0.975x +Texture2D.Load uniform: 7.833ms 0.261ms 0.002ms 1.818x +Texture2D.Load linear: 9.398ms 0.313ms 0.004ms 1.516x +Texture2D.Load random: 7.854ms 0.262ms 0.001ms 1.813x +Texture2D.Load uniform: 7.909ms 0.264ms 0.004ms 1.801x +Texture2D.Load linear: 14.557ms 0.485ms 0.004ms 0.978x +Texture2D.Load random: 8.474ms 0.282ms 0.001ms 1.681x +Texture2D.Load uniform: 14.683ms 0.489ms 0.002ms 0.970x +Texture2D.Load linear: 14.772ms 0.492ms 0.002ms 0.964x +Texture2D.Load random: 14.088ms 0.470ms 0.001ms 1.011x +Texture2D.Sample(nearest) uniform: 28.607ms 0.954ms 0.003ms 0.498x +Texture2D.Sample(nearest) linear: 28.502ms 0.950ms 0.003ms 0.500x +Texture2D.Sample(nearest) random: 28.285ms 0.943ms 0.003ms 0.504x +Texture2D.Sample(nearest) uniform: 27.499ms 0.917ms 0.004ms 0.518x +Texture2D.Sample(nearest) linear: 28.022ms 0.934ms 0.002ms 0.508x +Texture2D.Sample(nearest) random: 28.157ms 0.939ms 0.044ms 0.506x +Texture2D.Sample(nearest) uniform: 27.847ms 0.928ms 0.002ms 0.511x +Texture2D.Sample(nearest) linear: 27.302ms 0.910ms 0.004ms 0.522x +Texture2D.Sample(nearest) random: 27.884ms 0.929ms 0.002ms 0.511x +Texture2D.Sample(nearest) uniform: 27.725ms 0.924ms 0.002ms 0.514x +Texture2D.Sample(nearest) linear: 27.746ms 0.925ms 0.001ms 0.513x +Texture2D.Sample(nearest) random: 27.107ms 0.904ms 0.001ms 0.525x +Texture2D.Sample(nearest) uniform: 27.703ms 0.923ms 0.001ms 0.514x +Texture2D.Sample(nearest) linear: 27.772ms 0.926ms 0.001ms 0.513x +Texture2D.Sample(nearest) random: 27.756ms 0.925ms 0.003ms 0.513x +Texture2D.Sample(nearest) uniform: 27.173ms 0.906ms 0.004ms 0.524x +Texture2D.Sample(nearest) linear: 27.843ms 0.928ms 0.001ms 0.512x +Texture2D.Sample(nearest) random: 27.823ms 0.927ms 0.001ms 0.512x +Texture2D.Sample(nearest) uniform: 27.672ms 0.922ms 0.001ms 0.515x +Texture2D.Sample(nearest) linear: 27.135ms 0.905ms 0.004ms 0.525x +Texture2D.Sample(nearest) random: 27.722ms 0.924ms 0.001ms 0.514x +Texture2D.Sample(nearest) uniform: 27.695ms 0.923ms 0.001ms 0.514x +Texture2D.Sample(nearest) linear: 27.794ms 0.926ms 0.001ms 0.512x +Texture2D.Sample(nearest) random: 27.159ms 0.905ms 0.001ms 0.524x +Texture2D.Sample(bilinear) uniform: 27.767ms 0.926ms 0.001ms 0.513x +Texture2D.Sample(nearest) linear: 27.861ms 0.929ms 0.001ms 0.511x +Texture2D.Sample(nearest) random: 27.835ms 0.928ms 0.001ms 0.512x +Texture2D.Sample(bilinear) uniform: 27.067ms 0.902ms 0.003ms 0.526x +Texture2D.Sample(bilinear) linear: 27.724ms 0.924ms 0.001ms 0.514x +Texture2D.Sample(bilinear) random: 27.729ms 0.924ms 0.002ms 0.514x +Texture2D.Sample(bilinear) uniform: 27.700ms 0.923ms 0.001ms 0.514x +Texture2D.Sample(bilinear) linear: 27.180ms 0.906ms 0.004ms 0.524x +Texture2D.Sample(bilinear) random: 27.778ms 0.926ms 0.002ms 0.513x +Texture2D.Sample(bilinear) uniform: 27.750ms 0.925ms 0.002ms 0.513x +Texture2D.Sample(bilinear) linear: 27.830ms 0.928ms 0.001ms 0.512x +Texture2D.Sample(bilinear) random: 27.205ms 0.907ms 0.001ms 0.524x +Texture2D.Sample(bilinear) uniform: 27.649ms 0.922ms 0.004ms 0.515x +Texture2D.Sample(bilinear) linear: 27.724ms 0.924ms 0.001ms 0.514x +Texture2D.Sample(bilinear) random: 27.727ms 0.924ms 0.001ms 0.514x +Texture2D.Sample(bilinear) uniform: 27.081ms 0.903ms 0.001ms 0.526x +Texture2D.Sample(bilinear) linear: 27.765ms 0.925ms 0.004ms 0.513x +Texture2D.Sample(bilinear) random: 27.766ms 0.926ms 0.001ms 0.513x +Texture2D.Sample(bilinear) uniform: 27.767ms 0.926ms 0.001ms 0.513x +Texture2D.Sample(bilinear) linear: 27.221ms 0.907ms 0.001ms 0.523x +Texture2D.Sample(bilinear) random: 27.828ms 0.928ms 0.001ms 0.512x +Texture2D.Sample(bilinear) uniform: 27.651ms 0.922ms 0.004ms 0.515x +Texture2D.Sample(bilinear) linear: 27.708ms 0.924ms 0.004ms 0.514x +Texture2D.Sample(bilinear) random: 27.103ms 0.903ms 0.000ms 0.525x +Texture2D.Sample(bilinear) uniform: 27.703ms 0.923ms 0.001ms 0.514x +Texture2D.Sample(bilinear) linear: 27.780ms 0.926ms 0.002ms 0.513x +Texture2D.Sample(bilinear) random: 27.771ms 0.926ms 0.001ms 0.513x +Texture2D.Sample(bilinear) uniform: 54.000ms 1.800ms 0.001ms 0.264x +Texture2D.Sample(bilinear) linear: 54.737ms 1.825ms 0.005ms 0.260x +Texture2D.Sample(bilinear) random: 54.696ms 1.823ms 0.004ms 0.260x +``` + ### NVidia Maxwell (GTX 980 Ti) ```markdown Buffer.Load uniform: 1.249ms 28.812x @@ -1066,6 +1208,149 @@ Texture2D.Load random: 32.783ms 0.499x **Uniform address optimization:** Like Volta, the new uniform address optimization no longer affects StructuredBuffers. My educated guess is that StructuredBuffers (like raw buffers) now use the same lower latency direct memory path. Nvidia most likely hasn't yet implemented uniform address optimization for these new memory operations. Turing uniform address optimization performance however (in other cases) returns to similar 20x+ figures than Maxwell/Pascal. + +### NVidia Turing (RTX 2060) Dx12+Dxc +```markdown +Buffer.Load uniform: 42.546ms 1.418ms 0.024ms 0.974x +Buffer.Load linear: 42.429ms 1.414ms 0.024ms 0.976x +Buffer.Load random: 42.231ms 1.408ms 0.024ms 0.981x +Buffer.Load uniform: 42.098ms 1.403ms 0.028ms 0.984x +Buffer.Load linear: 41.970ms 1.399ms 0.024ms 0.987x +Buffer.Load random: 41.749ms 1.392ms 0.026ms 0.992x +Buffer.Load uniform: 42.109ms 1.404ms 0.027ms 0.984x +Buffer.Load linear: 41.704ms 1.390ms 0.024ms 0.993x +Buffer.Load random: 41.431ms 1.381ms 0.023ms 1.000x +Buffer.Load uniform: 41.301ms 1.377ms 0.023ms 1.003x +Buffer.Load linear: 41.118ms 1.371ms 0.017ms 1.008x +Buffer.Load random: 41.085ms 1.369ms 0.018ms 1.008x +Buffer.Load uniform: 41.019ms 1.367ms 0.017ms 1.010x +Buffer.Load linear: 40.915ms 1.364ms 0.013ms 1.013x +Buffer.Load random: 40.896ms 1.363ms 0.013ms 1.013x +Buffer.Load uniform: 41.213ms 1.374ms 0.013ms 1.005x +Buffer.Load linear: 41.009ms 1.367ms 0.012ms 1.010x +Buffer.Load random: 40.921ms 1.364ms 0.011ms 1.012x +Buffer.Load uniform: 40.977ms 1.366ms 0.014ms 1.011x +Buffer.Load linear: 41.016ms 1.367ms 0.013ms 1.010x +Buffer.Load random: 41.044ms 1.368ms 0.016ms 1.009x +Buffer.Load uniform: 41.188ms 1.373ms 0.017ms 1.006x +Buffer.Load linear: 41.307ms 1.377ms 0.023ms 1.003x +Buffer.Load random: 41.455ms 1.382ms 0.022ms 0.999x +Buffer.Load uniform: 82.315ms 2.744ms 0.050ms 0.503x +Buffer.Load linear: 82.720ms 2.757ms 0.046ms 0.501x +Buffer.Load random: 83.249ms 2.775ms 0.042ms 0.498x +ByteAddressBuffer.Load uniform: 24.914ms 0.830ms 0.013ms 1.663x +ByteAddressBuffer.Load linear: 27.733ms 0.924ms 0.010ms 1.494x +ByteAddressBuffer.Load random: 25.408ms 0.847ms 0.008ms 1.631x +ByteAddressBuffer.Load2 uniform: 29.087ms 0.970ms 0.008ms 1.424x +ByteAddressBuffer.Load2 linear: 45.967ms 1.532ms 0.011ms 0.901x +ByteAddressBuffer.Load2 random: 29.715ms 0.990ms 0.008ms 1.394x +ByteAddressBuffer.Load3 uniform: 36.015ms 1.200ms 0.010ms 1.150x +ByteAddressBuffer.Load3 linear: 48.069ms 1.602ms 0.010ms 0.862x +ByteAddressBuffer.Load3 random: 40.612ms 1.354ms 0.009ms 1.020x +ByteAddressBuffer.Load4 uniform: 42.966ms 1.432ms 0.012ms 0.964x +ByteAddressBuffer.Load4 linear: 114.037ms 3.801ms 0.050ms 0.363x +ByteAddressBuffer.Load4 random: 88.896ms 2.963ms 0.058ms 0.466x +ByteAddressBuffer.Load2 unaligned uniform: 29.944ms 0.998ms 0.024ms 1.384x +ByteAddressBuffer.Load2 unaligned linear: 47.366ms 1.579ms 0.044ms 0.875x +ByteAddressBuffer.Load2 unaligned random: 30.523ms 1.017ms 0.026ms 1.357x +ByteAddressBuffer.Load4 unaligned uniform: 44.288ms 1.476ms 0.042ms 0.935x +ByteAddressBuffer.Load4 unaligned linear: 117.805ms 3.927ms 0.101ms 0.352x +ByteAddressBuffer.Load4 unaligned random: 92.137ms 3.071ms 0.075ms 0.450x +StructuredBuffer.Load uniform: 21.173ms 0.706ms 0.018ms 1.957x +StructuredBuffer.Load linear: 24.887ms 0.830ms 0.015ms 1.665x +StructuredBuffer.Load random: 23.340ms 0.778ms 0.016ms 1.775x +StructuredBuffer.Load uniform: 23.389ms 0.780ms 0.015ms 1.771x +StructuredBuffer.Load linear: 26.705ms 0.890ms 0.018ms 1.551x +StructuredBuffer.Load random: 25.050ms 0.835ms 0.015ms 1.654x +StructuredBuffer.Load uniform: 45.781ms 1.526ms 0.026ms 0.905x +StructuredBuffer.Load linear: 59.790ms 1.993ms 0.034ms 0.693x +StructuredBuffer.Load random: 53.258ms 1.775ms 0.027ms 0.778x +cbuffer{float4} load uniform: 25.805ms 0.860ms 0.015ms 1.606x +cbuffer{float4} load linear: 1052.681ms 35.089ms 0.402ms 0.039x +cbuffer{float4} load random: 600.614ms 20.020ms 0.306ms 0.069x +Texture2D.Load uniform: 41.035ms 1.368ms 0.027ms 1.010x +Texture2D.Load linear: 40.913ms 1.364ms 0.024ms 1.013x +Texture2D.Load random: 40.795ms 1.360ms 0.013ms 1.016x +Texture2D.Load uniform: 40.818ms 1.361ms 0.012ms 1.015x +Texture2D.Load linear: 40.847ms 1.362ms 0.011ms 1.014x +Texture2D.Load random: 40.785ms 1.360ms 0.013ms 1.016x +Texture2D.Load uniform: 40.805ms 1.360ms 0.011ms 1.015x +Texture2D.Load linear: 41.333ms 1.378ms 0.009ms 1.002x +Texture2D.Load random: 41.057ms 1.369ms 0.009ms 1.009x +Texture2D.Load uniform: 40.666ms 1.356ms 0.009ms 1.019x +Texture2D.Load linear: 40.812ms 1.360ms 0.008ms 1.015x +Texture2D.Load random: 40.786ms 1.360ms 0.011ms 1.016x +Texture2D.Load uniform: 40.809ms 1.360ms 0.015ms 1.015x +Texture2D.Load linear: 41.111ms 1.370ms 0.012ms 1.008x +Texture2D.Load random: 41.019ms 1.367ms 0.021ms 1.010x +Texture2D.Load uniform: 41.404ms 1.380ms 0.024ms 1.001x +Texture2D.Load linear: 42.212ms 1.407ms 0.025ms 0.982x +Texture2D.Load random: 82.685ms 2.756ms 0.089ms 0.501x +Texture2D.Load uniform: 42.099ms 1.403ms 0.050ms 0.984x +Texture2D.Load linear: 42.544ms 1.418ms 0.052ms 0.974x +Texture2D.Load random: 42.721ms 1.424ms 0.057ms 0.970x +Texture2D.Load uniform: 43.266ms 1.442ms 0.056ms 0.958x +Texture2D.Load linear: 43.760ms 1.459ms 0.055ms 0.947x +Texture2D.Load random: 86.674ms 2.889ms 0.086ms 0.478x +Texture2D.Load uniform: 87.993ms 2.933ms 0.060ms 0.471x +Texture2D.Load linear: 89.194ms 2.973ms 0.025ms 0.465x +Texture2D.Load random: 89.533ms 2.984ms 0.022ms 0.463x +Texture2D.Sample(nearest) uniform: 45.503ms 1.517ms 0.014ms 0.911x +Texture2D.Sample(nearest) linear: 45.477ms 1.516ms 0.012ms 0.911x +Texture2D.Sample(nearest) random: 45.390ms 1.513ms 0.011ms 0.913x +Texture2D.Sample(nearest) uniform: 45.568ms 1.519ms 0.010ms 0.909x +Texture2D.Sample(nearest) linear: 45.669ms 1.522ms 0.011ms 0.907x +Texture2D.Sample(nearest) random: 45.633ms 1.521ms 0.011ms 0.908x +Texture2D.Sample(nearest) uniform: 47.668ms 1.589ms 0.012ms 0.869x +Texture2D.Sample(nearest) linear: 48.155ms 1.605ms 0.013ms 0.860x +Texture2D.Sample(nearest) random: 48.043ms 1.601ms 0.014ms 0.862x +Texture2D.Sample(nearest) uniform: 45.443ms 1.515ms 0.013ms 0.912x +Texture2D.Sample(nearest) linear: 45.389ms 1.513ms 0.012ms 0.913x +Texture2D.Sample(nearest) random: 45.382ms 1.513ms 0.010ms 0.913x +Texture2D.Sample(nearest) uniform: 45.530ms 1.518ms 0.010ms 0.910x +Texture2D.Sample(nearest) linear: 45.730ms 1.524ms 0.012ms 0.906x +Texture2D.Sample(nearest) random: 45.593ms 1.520ms 0.012ms 0.909x +Texture2D.Sample(nearest) uniform: 47.595ms 1.587ms 0.015ms 0.870x +Texture2D.Sample(nearest) linear: 48.407ms 1.614ms 0.018ms 0.856x +Texture2D.Sample(nearest) random: 90.192ms 3.006ms 0.038ms 0.459x +Texture2D.Sample(nearest) uniform: 45.930ms 1.531ms 0.023ms 0.902x +Texture2D.Sample(nearest) linear: 46.137ms 1.538ms 0.025ms 0.898x +Texture2D.Sample(nearest) random: 46.184ms 1.539ms 0.022ms 0.897x +Texture2D.Sample(nearest) uniform: 46.440ms 1.548ms 0.024ms 0.892x +Texture2D.Sample(nearest) linear: 46.688ms 1.556ms 0.020ms 0.887x +Texture2D.Sample(nearest) random: 91.388ms 3.046ms 0.033ms 0.453x +Texture2D.Sample(bilinear) uniform: 91.601ms 3.053ms 0.029ms 0.452x +Texture2D.Sample(nearest) linear: 91.665ms 3.055ms 0.029ms 0.452x +Texture2D.Sample(nearest) random: 91.513ms 3.050ms 0.028ms 0.453x +Texture2D.Sample(bilinear) uniform: 46.322ms 1.544ms 0.015ms 0.894x +Texture2D.Sample(bilinear) linear: 46.185ms 1.540ms 0.014ms 0.897x +Texture2D.Sample(bilinear) random: 46.078ms 1.536ms 0.015ms 0.899x +Texture2D.Sample(bilinear) uniform: 46.202ms 1.540ms 0.013ms 0.897x +Texture2D.Sample(bilinear) linear: 46.313ms 1.544ms 0.014ms 0.895x +Texture2D.Sample(bilinear) random: 46.112ms 1.537ms 0.016ms 0.898x +Texture2D.Sample(bilinear) uniform: 48.008ms 1.600ms 0.018ms 0.863x +Texture2D.Sample(bilinear) linear: 48.692ms 1.623ms 0.018ms 0.851x +Texture2D.Sample(bilinear) random: 48.411ms 1.614ms 0.017ms 0.856x +Texture2D.Sample(bilinear) uniform: 45.681ms 1.523ms 0.018ms 0.907x +Texture2D.Sample(bilinear) linear: 45.741ms 1.525ms 0.012ms 0.906x +Texture2D.Sample(bilinear) random: 45.624ms 1.521ms 0.012ms 0.908x +Texture2D.Sample(bilinear) uniform: 45.744ms 1.525ms 0.010ms 0.906x +Texture2D.Sample(bilinear) linear: 45.987ms 1.533ms 0.016ms 0.901x +Texture2D.Sample(bilinear) random: 45.889ms 1.530ms 0.015ms 0.903x +Texture2D.Sample(bilinear) uniform: 47.870ms 1.596ms 0.014ms 0.865x +Texture2D.Sample(bilinear) linear: 48.595ms 1.620ms 0.015ms 0.853x +Texture2D.Sample(bilinear) random: 90.672ms 3.022ms 0.027ms 0.457x +Texture2D.Sample(bilinear) uniform: 46.160ms 1.539ms 0.017ms 0.898x +Texture2D.Sample(bilinear) linear: 46.187ms 1.540ms 0.017ms 0.897x +Texture2D.Sample(bilinear) random: 46.286ms 1.543ms 0.017ms 0.895x +Texture2D.Sample(bilinear) uniform: 46.592ms 1.553ms 0.014ms 0.889x +Texture2D.Sample(bilinear) linear: 46.834ms 1.561ms 0.014ms 0.885x +Texture2D.Sample(bilinear) random: 91.524ms 3.051ms 0.023ms 0.453x +Texture2D.Sample(bilinear) uniform: 182.679ms 6.089ms 0.039ms 0.227x +Texture2D.Sample(bilinear) linear: 182.764ms 6.092ms 0.037ms 0.227x +Texture2D.Sample(bilinear) random: 270.850ms 9.028ms 0.067ms 0.153x +``` + ### NVidia Ampere (RTX 3090) ``` Buffer.Load uniform: 0.691ms 15.067x @@ -1212,6 +1497,148 @@ Texture2D.Sample(bilinear) random: 74.230ms 0.140x **Sampler ratios (NEW!):** New tests for sampler ratios show that Ampere has half rate bilinear RG32F and quarter rate bilinear RGBA32F. Nearest filtering is full rate, except for RGBA32F which is half rate (similar to RGBA32F texture loads). In Turing and Ampere RGBA32/float4 buffer loads are full rate. +### NVidia Ampere (RTX 3060) Dx12+Dxc +```markdown +Buffer.Load uniform: 19.301ms 0.643ms 0.001ms 1.493x +Buffer.Load linear: 19.310ms 0.644ms 0.002ms 1.493x +Buffer.Load random: 22.015ms 0.734ms 0.001ms 1.309x +Buffer.Load uniform: 19.656ms 0.655ms 0.052ms 1.466x +Buffer.Load linear: 19.562ms 0.652ms 0.035ms 1.473x +Buffer.Load random: 19.599ms 0.653ms 0.037ms 1.470x +Buffer.Load uniform: 20.206ms 0.674ms 0.001ms 1.426x +Buffer.Load linear: 20.247ms 0.675ms 0.001ms 1.423x +Buffer.Load random: 28.820ms 0.961ms 0.036ms 1.000x +Buffer.Load uniform: 19.200ms 0.640ms 0.001ms 1.501x +Buffer.Load linear: 19.360ms 0.645ms 0.002ms 1.489x +Buffer.Load random: 19.289ms 0.643ms 0.001ms 1.494x +Buffer.Load uniform: 19.642ms 0.655ms 0.078ms 1.467x +Buffer.Load linear: 19.385ms 0.646ms 0.000ms 1.487x +Buffer.Load random: 28.574ms 0.952ms 0.003ms 1.009x +Buffer.Load uniform: 20.290ms 0.676ms 0.001ms 1.420x +Buffer.Load linear: 20.368ms 0.679ms 0.003ms 1.415x +Buffer.Load random: 20.234ms 0.674ms 0.001ms 1.424x +Buffer.Load uniform: 19.200ms 0.640ms 0.001ms 1.501x +Buffer.Load linear: 19.365ms 0.645ms 0.002ms 1.488x +Buffer.Load random: 28.627ms 0.954ms 0.002ms 1.007x +Buffer.Load uniform: 19.243ms 0.641ms 0.001ms 1.498x +Buffer.Load linear: 19.273ms 0.642ms 0.004ms 1.495x +Buffer.Load random: 19.234ms 0.641ms 0.002ms 1.498x +Buffer.Load uniform: 38.357ms 1.279ms 0.040ms 0.751x +Buffer.Load linear: 38.228ms 1.274ms 0.001ms 0.754x +Buffer.Load random: 38.096ms 1.270ms 0.001ms 0.757x +ByteAddressBuffer.Load uniform: 24.101ms 0.803ms 0.078ms 1.196x +ByteAddressBuffer.Load linear: 26.086ms 0.870ms 0.000ms 1.105x +ByteAddressBuffer.Load random: 24.122ms 0.804ms 0.052ms 1.195x +ByteAddressBuffer.Load2 uniform: 28.951ms 0.965ms 0.086ms 0.996x +ByteAddressBuffer.Load2 linear: 29.716ms 0.991ms 0.085ms 0.970x +ByteAddressBuffer.Load2 random: 27.587ms 0.920ms 0.102ms 1.045x +ByteAddressBuffer.Load3 uniform: 32.907ms 1.097ms 0.130ms 0.876x +ByteAddressBuffer.Load3 linear: 35.248ms 1.175ms 0.141ms 0.818x +ByteAddressBuffer.Load3 random: 33.923ms 1.131ms 0.001ms 0.850x +ByteAddressBuffer.Load4 uniform: 38.487ms 1.283ms 0.062ms 0.749x +ByteAddressBuffer.Load4 linear: 49.946ms 1.665ms 0.113ms 0.577x +ByteAddressBuffer.Load4 random: 38.990ms 1.300ms 0.093ms 0.739x +ByteAddressBuffer.Load2 unaligned uniform: 29.002ms 0.967ms 0.069ms 0.994x +ByteAddressBuffer.Load2 unaligned linear: 29.620ms 0.987ms 0.088ms 0.973x +ByteAddressBuffer.Load2 unaligned random: 27.663ms 0.922ms 0.097ms 1.042x +ByteAddressBuffer.Load4 unaligned uniform: 39.116ms 1.304ms 0.101ms 0.737x +ByteAddressBuffer.Load4 unaligned linear: 48.755ms 1.625ms 0.081ms 0.591x +ByteAddressBuffer.Load4 unaligned random: 38.488ms 1.283ms 0.078ms 0.749x +StructuredBuffer.Load uniform: 17.312ms 0.577ms 0.033ms 1.665x +StructuredBuffer.Load linear: 22.315ms 0.744ms 0.001ms 1.292x +StructuredBuffer.Load random: 22.470ms 0.749ms 0.001ms 1.283x +StructuredBuffer.Load uniform: 18.260ms 0.609ms 0.001ms 1.578x +StructuredBuffer.Load linear: 22.315ms 0.744ms 0.001ms 1.292x +StructuredBuffer.Load random: 22.369ms 0.746ms 0.000ms 1.288x +StructuredBuffer.Load uniform: 20.777ms 0.693ms 0.001ms 1.387x +StructuredBuffer.Load linear: 22.985ms 0.766ms 0.047ms 1.254x +StructuredBuffer.Load random: 22.442ms 0.748ms 0.001ms 1.284x +cbuffer{float4} load uniform: 26.342ms 0.878ms 0.052ms 1.094x +cbuffer{float4} load linear: 884.881ms 29.496ms 0.233ms 0.033x +cbuffer{float4} load random: 568.427ms 18.948ms 0.177ms 0.051x +Texture2D.Load uniform: 20.487ms 0.683ms 0.001ms 1.407x +Texture2D.Load linear: 20.420ms 0.681ms 0.001ms 1.411x +Texture2D.Load random: 20.442ms 0.681ms 0.002ms 1.410x +Texture2D.Load uniform: 20.385ms 0.679ms 0.001ms 1.414x +Texture2D.Load linear: 20.358ms 0.679ms 0.002ms 1.416x +Texture2D.Load random: 20.539ms 0.685ms 0.001ms 1.403x +Texture2D.Load uniform: 21.413ms 0.714ms 0.000ms 1.346x +Texture2D.Load linear: 29.704ms 0.990ms 0.002ms 0.970x +Texture2D.Load random: 40.703ms 1.357ms 0.089ms 0.708x +Texture2D.Load uniform: 20.507ms 0.684ms 0.001ms 1.405x +Texture2D.Load linear: 20.424ms 0.681ms 0.001ms 1.411x +Texture2D.Load random: 20.470ms 0.682ms 0.002ms 1.408x +Texture2D.Load uniform: 20.440ms 0.681ms 0.000ms 1.410x +Texture2D.Load linear: 29.577ms 0.986ms 0.000ms 0.974x +Texture2D.Load random: 40.110ms 1.337ms 0.001ms 0.719x +Texture2D.Load uniform: 21.624ms 0.721ms 0.001ms 1.333x +Texture2D.Load linear: 54.502ms 1.817ms 0.005ms 0.529x +Texture2D.Load random: 80.639ms 2.688ms 0.009ms 0.357x +Texture2D.Load uniform: 20.586ms 0.686ms 0.001ms 1.400x +Texture2D.Load linear: 29.941ms 0.998ms 0.004ms 0.963x +Texture2D.Load random: 40.446ms 1.348ms 0.044ms 0.713x +Texture2D.Load uniform: 20.643ms 0.688ms 0.001ms 1.396x +Texture2D.Load linear: 54.445ms 1.815ms 0.002ms 0.529x +Texture2D.Load random: 80.666ms 2.689ms 0.005ms 0.357x +Texture2D.Load uniform: 40.736ms 1.358ms 0.000ms 0.707x +Texture2D.Load linear: 80.360ms 2.679ms 0.001ms 0.359x +Texture2D.Load random: 80.761ms 2.692ms 0.048ms 0.357x +Texture2D.Sample(nearest) uniform: 40.360ms 1.345ms 0.001ms 0.714x +Texture2D.Sample(nearest) linear: 40.364ms 1.345ms 0.000ms 0.714x +Texture2D.Sample(nearest) random: 40.397ms 1.347ms 0.002ms 0.713x +Texture2D.Sample(nearest) uniform: 40.372ms 1.346ms 0.001ms 0.714x +Texture2D.Sample(nearest) linear: 40.719ms 1.357ms 0.039ms 0.708x +Texture2D.Sample(nearest) random: 40.383ms 1.346ms 0.001ms 0.714x +Texture2D.Sample(nearest) uniform: 40.392ms 1.346ms 0.001ms 0.714x +Texture2D.Sample(nearest) linear: 40.717ms 1.357ms 0.036ms 0.708x +Texture2D.Sample(nearest) random: 40.848ms 1.362ms 0.061ms 0.706x +Texture2D.Sample(nearest) uniform: 40.351ms 1.345ms 0.001ms 0.714x +Texture2D.Sample(nearest) linear: 40.479ms 1.349ms 0.001ms 0.712x +Texture2D.Sample(nearest) random: 40.421ms 1.347ms 0.002ms 0.713x +Texture2D.Sample(nearest) uniform: 40.566ms 1.352ms 0.036ms 0.710x +Texture2D.Sample(nearest) linear: 41.461ms 1.382ms 0.101ms 0.695x +Texture2D.Sample(nearest) random: 41.515ms 1.384ms 0.131ms 0.694x +Texture2D.Sample(nearest) uniform: 40.834ms 1.361ms 0.055ms 0.706x +Texture2D.Sample(nearest) linear: 40.519ms 1.351ms 0.002ms 0.711x +Texture2D.Sample(nearest) random: 80.786ms 2.693ms 0.085ms 0.357x +Texture2D.Sample(nearest) uniform: 40.357ms 1.345ms 0.001ms 0.714x +Texture2D.Sample(nearest) linear: 40.712ms 1.357ms 0.042ms 0.708x +Texture2D.Sample(nearest) random: 40.478ms 1.349ms 0.000ms 0.712x +Texture2D.Sample(nearest) uniform: 40.826ms 1.361ms 0.080ms 0.706x +Texture2D.Sample(nearest) linear: 40.496ms 1.350ms 0.002ms 0.712x +Texture2D.Sample(nearest) random: 80.505ms 2.683ms 0.009ms 0.358x +Texture2D.Sample(bilinear) uniform: 80.725ms 2.691ms 0.004ms 0.357x +Texture2D.Sample(nearest) linear: 80.660ms 2.689ms 0.077ms 0.357x +Texture2D.Sample(nearest) random: 80.280ms 2.676ms 0.003ms 0.359x +Texture2D.Sample(bilinear) uniform: 40.363ms 1.345ms 0.001ms 0.714x +Texture2D.Sample(bilinear) linear: 40.565ms 1.352ms 0.035ms 0.710x +Texture2D.Sample(bilinear) random: 40.389ms 1.346ms 0.002ms 0.714x +Texture2D.Sample(bilinear) uniform: 40.369ms 1.346ms 0.001ms 0.714x +Texture2D.Sample(bilinear) linear: 40.491ms 1.350ms 0.000ms 0.712x +Texture2D.Sample(bilinear) random: 40.825ms 1.361ms 0.080ms 0.706x +Texture2D.Sample(bilinear) uniform: 40.745ms 1.358ms 0.060ms 0.707x +Texture2D.Sample(bilinear) linear: 40.716ms 1.357ms 0.037ms 0.708x +Texture2D.Sample(bilinear) random: 40.715ms 1.357ms 0.036ms 0.708x +Texture2D.Sample(bilinear) uniform: 40.375ms 1.346ms 0.001ms 0.714x +Texture2D.Sample(bilinear) linear: 40.483ms 1.349ms 0.001ms 0.712x +Texture2D.Sample(bilinear) random: 40.405ms 1.347ms 0.002ms 0.713x +Texture2D.Sample(bilinear) uniform: 40.382ms 1.346ms 0.002ms 0.714x +Texture2D.Sample(bilinear) linear: 40.492ms 1.350ms 0.001ms 0.712x +Texture2D.Sample(bilinear) random: 41.360ms 1.379ms 0.111ms 0.697x +Texture2D.Sample(bilinear) uniform: 41.048ms 1.368ms 0.115ms 0.702x +Texture2D.Sample(bilinear) linear: 40.525ms 1.351ms 0.002ms 0.711x +Texture2D.Sample(bilinear) random: 80.319ms 2.677ms 0.002ms 0.359x +Texture2D.Sample(bilinear) uniform: 40.381ms 1.346ms 0.002ms 0.714x +Texture2D.Sample(bilinear) linear: 40.492ms 1.350ms 0.001ms 0.712x +Texture2D.Sample(bilinear) random: 40.483ms 1.349ms 0.001ms 0.712x +Texture2D.Sample(bilinear) uniform: 40.393ms 1.346ms 0.003ms 0.714x +Texture2D.Sample(bilinear) linear: 40.960ms 1.365ms 0.084ms 0.704x +Texture2D.Sample(bilinear) random: 80.945ms 2.698ms 0.058ms 0.356x +Texture2D.Sample(bilinear) uniform: 161.137ms 5.371ms 0.130ms 0.179x +Texture2D.Sample(bilinear) linear: 160.190ms 5.340ms 0.001ms 0.180x +Texture2D.Sample(bilinear) random: 245.120ms 8.171ms 0.001ms 0.118x +``` + ### Intel Gen9 (HD 630 / i7 6700K) ```markdown Buffer.Load uniform: 48.527ms 5.955x From db7eddcf4c5f9715ac03a5f80be12dbcb5681ba3 Mon Sep 17 00:00:00 2001 From: Ruslan Kutdusov Date: Mon, 21 Apr 2025 22:23:16 +0100 Subject: [PATCH 6/6] add packages.config --- perftest/packages.config | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 perftest/packages.config diff --git a/perftest/packages.config b/perftest/packages.config new file mode 100644 index 0000000..e81fa55 --- /dev/null +++ b/perftest/packages.config @@ -0,0 +1,4 @@ + + + + \ No newline at end of file