I've followed the tutorial at www.vulkan-tutorial.com and I'm trying to split the Uniform buffer into 2 seperate buffers, one for View and Projection and one for Model. I've found however once I add another buffer to the layout, even if my shaders don't use it's content, no geometry is rendered. I don't get anything from the validation layers.
I've found that if the two UBOs are the same buffer, I have no problem. But if I assign them to different buffers, nothing appears on the screen. Have added descriptor set generation code.
Here's my layout generation code. All values are submitted correctly, bindings are 0, 1 and 2 respectively and this is reflected in shader code. I'm currently not even using the data in the buffer in the shader - so it's got nothing to do with the data I'm actually putting in the buffer.
Edit: Have opened up in RenderDoc. Without the extra buffer, I can see the normal VP buffer and it's values. They look fine. If I add in the extra buffer, it does not show up, but also the data from the first buffer is all zeroes.
Descriptor Set Layout generation:
std::vector<VkDescriptorSetLayoutBinding> layoutBindings;
/*
newShader->features includes 3 "features", with bindings 0,1,2.
They are - uniform buffer, uniform buffer, sampler
vertex bit, vertex bit, fragment bit
*/
for (auto a : newShader->features)
{
VkDescriptorSetLayoutBinding newBinding = {};
newBinding.descriptorType = (VkDescriptorType)layoutBindingDescriptorType(a.featureType);
newBinding.binding = a.binding;
newBinding.stageFlags = (VkShaderStageFlags)layoutBindingStageFlag(a.stage);
newBinding.descriptorCount = 1;
newBinding.pImmutableSamplers = nullptr;
layoutBindings.push_back(newBinding);
}
VkDescriptorSetLayoutCreateInfo layoutCreateInfo = {};
layoutCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
layoutCreateInfo.bindingCount = static_cast<uint32_t>(layoutBindings.size());
layoutCreateInfo.pBindings = layoutBindings.data();
Descriptor Set Generation:
//Create a list of layouts
std::vector<VkDescriptorSetLayout> layouts(swapChainImages.size(), voa->shaderPipeline->shaderSetLayout);
//Allocate room for the descriptors
VkDescriptorSetAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
allocInfo.descriptorPool = voa->shaderPipeline->descriptorPool;
allocInfo.descriptorSetCount = static_cast<uint32_t>(swapChainImages.size());
allocInfo.pSetLayouts = layouts.data();
voa->descriptorSets.resize(swapChainImages.size());
if (vkAllocateDescriptorSets(vdi->device, &allocInfo, voa->descriptorSets.data()) != VK_SUCCESS) {
throw std::runtime_error("failed to allocate descriptor sets!");
}
//For each set of commandBuffers (frames in flight +1)
for (size_t i = 0; i < swapChainImages.size(); i++) {
std::vector<VkWriteDescriptorSet> descriptorWrites;
//Buffer Info construction
for (auto a : voa->renderComponent->getMaterial()->shader->features)
{
//Create a new descriptor write
uint32_t index = descriptorWrites.size();
descriptorWrites.push_back({});
descriptorWrites[index].dstBinding = a.binding;
if (a.featureType == HE2_SHADER_FEATURE_TYPE_UNIFORM_BLOCK)
{
VkDescriptorBufferInfo bufferInfo = {};
if (a.bufferSource == HE2_SHADER_BUFFER_SOURCE_VIEW_PROJECTION_BUFFER)
{
bufferInfo.buffer = viewProjectionBuffers[i];
bufferInfo.offset = 0;
bufferInfo.range = sizeof(ViewProjectionBuffer);
}
else if (a.bufferSource == HE2_SHADER_BUFFER_SOURCE_MODEL_BUFFER)
{
bufferInfo.buffer = modelBuffers[i];
bufferInfo.offset = voa->ID * sizeof(ModelBuffer);
bufferInfo.range = sizeof(ModelBuffer);
}
//The following is the same for all Uniform buffers
descriptorWrites[index].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptorWrites[index].dstSet = voa->descriptorSets[i];
descriptorWrites[index].dstArrayElement = 0;
descriptorWrites[index].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
descriptorWrites[index].descriptorCount = 1;
descriptorWrites[index].pBufferInfo = &bufferInfo;
}
else if (a.featureType == HE2_SHADER_FEATURE_TYPE_SAMPLER2D)
{
VulkanImageReference ref = VulkanTextures::images[a.imageHandle];
VkDescriptorImageInfo imageInfo = {};
imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imageInfo.imageView = ref.imageView;
imageInfo.sampler = defaultSampler;
descriptorWrites[index].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptorWrites[index].dstSet = voa->descriptorSets[i];
descriptorWrites[index].dstArrayElement = 0;
descriptorWrites[index].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
descriptorWrites[index].descriptorCount = 1;
descriptorWrites[index].pImageInfo = &imageInfo;
}
else
{
throw std::runtime_error("Unsupported feature type present in shader");
}
}
vkUpdateDescriptorSets(vdi->device, static_cast<uint32_t>(descriptorWrites.size()), descriptorWrites.data(), 0, nullptr);
}
Edit: Here is descriptor set binding code
vkCmdBeginRenderPass(commandBuffers[i], &renderPassInfo, VK_SUBPASS_CONTENTS_INLINE);
//Very temporary Render loop. Binds every frame, very clumsy
for (int j = 0; j < max; j++)
{
VulkanObjectAttachment* voa = objectAttachments[j];
VulkanModelAttachment* vma = voa->renderComponent->getModel()->getComponent<VulkanModelAttachment>();
if (vma->indices == 0) continue;
vkCmdBindPipeline(commandBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, voa->shaderPipeline->pipeline);
VkBuffer vertexBuffers[] = { vma->vertexBuffer };
VkDeviceSize offsets[] = { 0 };
vkCmdBindVertexBuffers(commandBuffers[i], 0, 1, vertexBuffers, offsets);
vkCmdBindIndexBuffer(commandBuffers[i], vma->indexBuffer, 0, VK_INDEX_TYPE_UINT32);
vkCmdBindDescriptorSets(commandBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, voa->shaderPipeline->pipelineLayout, 0, 1, &voa->descriptorSets[i], 0, nullptr);
vkCmdDrawIndexed(commandBuffers[i], static_cast<uint32_t>(vma->indices), 1, 0, 0, 0);
}
vkCmdEndRenderPass(commandBuffers[i]);
Buffer updating code:
ViewProjectionBuffer ubo = {};
ubo.view = HE2_Camera::main->getCameraMatrix();
ubo.proj = HE2_Camera::main->getProjectionMatrix();
ubo.proj[1][1] *= -1;
ubo.model = a->object->getModelMatrix();
void* data;
vmaMapMemory(allocator, a->mvpAllocations[i], &data);
memcpy(data, &ubo, sizeof(ubo));
vmaUnmapMemory(allocator, a->mvpAllocations[i]);
}
std::vector<ModelBuffer> modelBuffersData;
for (VulkanObjectAttachment* voa : objectAttachments)
{
ModelBuffer mb = {};
mb.model = voa->object->getModelMatrix();
modelBuffersData.push_back(mb);
void* data;
vmaMapMemory(allocator, modelBuffersAllocation[i], &data);
memcpy(data, &modelBuffersData, sizeof(ModelBuffer) * modelBuffersData.size());
vmaUnmapMemory(allocator, modelBuffersAllocation[i]);
I found the problem - not a Vulkan issue but a C++ syntax one sadly. I'll explain it anyway but likely to not be your issue if you're visiting this page in the future.
I generate my descriptor writes in a loop. They're stored in a vector and then updated at the end of the loop
std::vector<VkDescriptorWrite> descriptorWrites;
for(int i = 0; i < shader.features.size); i++)
{
//Various stuff to the descriptor write
}
vkUpdateDescriptorSets(vdi->device, static_cast<uint32_t>(descriptorWrites.size()), descriptorWrites.data(), 0, nullptr);
One parameter of the descriptor write is pImageInfo or pBufferInfo. These point to a struct that contains specific data for that buffer or image. I filled these in within the loop
{//Within the loop above
//...
VkDescriptorBufferInfo bufferInfo = {};
bufferInfo.buffer = myBuffer;
descriptorWrites[i].pBufferInfo = &bufferInfo;
//...
}
Because these are passed by reference, not value, the descriptorWrite when being updated refers to the data in the original struct. But because the original struct was made in a loop, and the vkUpdateDescriptors line is outside of the loop, by the time that struct is read it's out of scope and deleted.
While this should result in undefined behaviour, I can only imagine because there's no new variables between the end of the loop and the update call, the memory still read the contents of the last descriptorWrite in the loop. So all descriptors read that memory, and had the resources from the last descriptorWrite pushed to them. Fixed it all just by putting the VkDescriptorBufferInfos in a vector of their own at the start of the loop.
It looks to me like the offset you're setting here is causing the VkWriteDescriptorSet to read overflow memory:
else if (a.bufferSource == HE2_SHADER_BUFFER_SOURCE_MODEL_BUFFER)
{
bufferInfo.buffer = modelBuffers[i];
bufferInfo.offset = voa->ID * sizeof(ModelBuffer);
bufferInfo.range = sizeof(ModelBuffer);
}
If you were only updating part of a buffer every frame, you'd do something like this:
bufferInfo.buffer = mvpBuffer[i];
bufferInfo.offset = sizeof(mat4[]{viewMat, projMat});
bufferInfo.range = sizeof(modelMat);
If you place the model in another buffer, you probably want to create a different binding for your descriptor set and your bufferInfo for your model data would look like this:
bufferInfo.buffer = modelBuffer[i];
bufferInfo.offset = 0;
bufferInfo.range = sizeof(modelMat);
Related
I would like to have information on the number of vertices that have been increased by doing Tessellation.
To do this, we send the vertex information from the Domain Shader to the Pixel Shader and use the RWStructureBuffer in the Pixel Shader as shown below.
struct Data
{
float3 position;
};
RWStructuredBuffer<Data> rwBuffer0 : register(u1);
・・・
Data data;
data.position = input.position;
rwBuffer0[id] = data;
・・・
}
On the CPU side, we are trying to receive the following.
struct ReternUAV
{
DirectX::XMFLOA3 position;
};
HRESULT hr = S_OK;
Microsoft::WRL::ComPtr<ID3D11Buffer> outputBuffer;
D3D11_BUFFER_DESC outputDesc;
outputDesc.Usage = D3D11_USAGE_DEFAULT;
outputDesc.ByteWidth = sizeof(ReternUAV) * 10000;
outputDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
outputDesc.CPUAccessFlags = 0;
outputDesc.StructureByteStride = sizeof(ReternUAV);
outputDesc.MiscFlags = 0;
device->CreateBuffer(&outputDesc, nullptr, outputBuffer.GetAddressOf());
Microsoft::WRL::ComPtr<ID3D11Buffer> outputResultBuffer;
outputDesc.Usage = D3D11_USAGE_STAGING;
outputDesc.BindFlags = 0;
outputDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
device->CreateBuffer(&outputDesc, nullptr, outputResultBuffer.GetAddressOf());
D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc;
uavDesc.Buffer.FirstElement = 0;
uavDesc.Buffer.Flags = 0;
uavDesc.Buffer.NumElements = 10000;
uavDesc.Format = DXGI_FORMAT_R32G32B32_FLOAT;
uavDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
Microsoft::WRL::ComPtr<ID3D11UnorderedAccessVie>unorderedAccessView;
hr = device->CreateUnorderedAccessView(outputBuffer.Get(), &uavDesc, unorderedAccessView.GetAddressOf());
if (FAILED(hr))
{
assert(!"CreateUnorderedAccessView"); // <ーFailed to create
}
ID3D11RenderTargetView* renderTarget = GameScene::GetRenderTargetView();
ID3D11DepthStencilView* deStencilView = GameScene::GetDepthStencilView();
context>OMSetRenderTargetsAndUnorderedAccessViews(1, &renderTarget, deStencilView,1, 1, unorderedAccessView.GetAddressOf(),NULL);
context->DrawIndexed(subset.indexCount, subset.indexStart, 0);
Microsoft::WRL::ComPtr<ID3D11UnorderedAccessView> unCom = nullptr;
context->OMSetRenderTargetsAndUnorderedAccessViews(1, &renderTarget, deStencilView,1, 1, unCom.GetAddressOf(),NULL);
context->CopyResource(outputResultBuffer.Get(), outputBuffer.Get());
D3D11_MAPPED_SUBRESOURCE mappedBuffer;
D3D11_MAP map = D3D11_MAP_READ;
hr = context->Map(outputResultBuffer.Get(), 0, map, 0, &mappedBuffer);
ReternUAV* copy = reinterpret_cast<ReternUAV*>(mappedBuffer.pData);
UINT num = sizeof(copy);
for (int i = 0; i < num; i++)
{
ReternUAV a = copy[i];
a = a;
}
context->Unmap(outputResultBuffer.Get(), 0);
It may be that the CreateUnorderedAccessView is failing to create it, but I couldn't figure out what was causing it.
If I ignore this and run,
The data in "copy" that I mapped and read is all 0,0,0 and there are only 8 elements.
I would like to ask you where I am going wrong.
If there is a better way to achieve the goal, I would like to hear about it.
Eventually, I would like to tessellation and handle the newly obtained data with the CPU.
Thank you very much for your help.
uavDesc.Format must be DXGI_FORMAT_UNKNOWN when creating a View of a Structured Buffer. Also "UINT num = sizeof(copy);" will not return the number of written vertices. :)
I recommend to create a device using D3D11_CREATE_DEVICE_DEBUG flag and then you will get an explanation why it failed to create the UAV. Just pass the flag to the D3D11CreateDevice().
The best way is to use D3D11_QUERY if you need only the number of vertices.
https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_query
https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ns-d3d11-d3d11_query_data_pipeline_statistics
D3D11_QUERY_DESC qdesc = {D3D11_QUERY_PIPELINE_STATISTICS};
ID3D11Query* query = 0;
device->CreateQuery(&qdesc, &query);
context->Begin(query);
context->DrawIndexed(index_count, 0, 0);
context->End(query);
D3D11_QUERY_DATA_PIPELINE_STATISTICS stats = {};
while (S_FALSE == context->GetData(query, &stats, sizeof(stats), 0))
;
query->Release();
I'm trying to make frustrum culling via compute shader. For that I have a pair of buffers for instanced vertex attributes, and a pair of buffers for indirect draw commands. My compute shader checks if instance coordinates from first buffer are within bounding volume, referencing first draw buffer for counts, subgroupBallot and bitCount to see offset within subgroup, then add results from other subgroups and a global offset, and finally stores the result in second buffer. The global offset is stored in second indirect draw buffer.
The problem is that, when under load, frustum may be few(>1) frames late to the moving camera, with wide lines of disappeared objects on edge. It seems weird to me because culling and rendering are done within same command buffer.
When taking capture in renderdoc, taking a screenshot alt+printScreen, or pausing the render-present thread, things snap back to as they should be.
My only guess is that compute shader from past frame continues to execute even when new frame starts to be drawn, though this should not be happening due to pipeline barriers.
Shader code:
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
struct drawData{
uint indexCount;
uint instanceCount;
uint firstIndex;
uint vertexOffset;
uint firstInstance;
};
struct instanceData{
float x, y, z;
float a, b, c, d;
};
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;
layout(set = 0, binding = 0) uniform A
{
mat4 cam;
vec4 camPos;
vec4 l;
vec4 t;
vec4 r;
vec4 b;
};
layout(set = 0, binding = 1) buffer B
{
uint count;
drawData data[];
} Draw[2];
layout(set = 0, binding = 2) buffer C
{
instanceData data[];
} Instance[2];
shared uint offsetsM[32];
void main()
{
const uint gID = gl_LocalInvocationID.x;
const uint lID = gl_SubgroupInvocationID;
const uint patchSize = gl_WorkGroupSize.x;
Draw[1].data[0] = Draw[0].data[0];//copy data like index count
Draw[1].count = Draw[0].count;
uint offsetG = 0;//accumulating offset within end buffer
uint loops = Draw[0].data[0].instanceCount/patchSize;//constant loop count
for(uint i = 0; i<loops;++i){
uint posa = i*patchSize+gID;//runs better this way for some reason
vec3 pos = camPos.xyz-vec3(Instance[0].data[posa].x, Instance[0].data[posa].y, Instance[0].data[posa].z);//position relative to camera
mat4x3 lrtb = mat4x3(l.xyz, r.xyz, t.xyz, b.xyz);
vec4 dist = pos*lrtb+Model.data[0].rad;//dot products and radius tolerance
bool Pass = posa<Draw[0].data[0].instanceCount&&//is real
(dot(pos, pos)<l.w*l.w) &&//not too far
all(greaterThan(dist, vec4(0))); //within view frustum
subgroupBarrier();//no idea what is the best, put what works
uvec4 actives = subgroupBallot(Pass);//count passed instances
if(subgroupElect())
offsetsM[gl_SubgroupID] = bitCount(actives).x+bitCount(actives).y;
barrier();
uint offsetL = bitCount(actives&gl_SubgroupLtMask).x+bitCount(actives&gl_SubgroupLtMask).y;//offset withing subgroup
uint ii = 0;
if(Pass){
for(; ii<gl_SubgroupID; ++ii)
offsetG+= offsetsM[ii];//offsets before subgroup
Instance[1].data[offsetG+offsetL] = Instance[0].data[posa];
for(; ii<gl_NumSubgroups; ++ii)
offsetG+= offsetsM[ii];}//offsets after subgroup
else for(; ii<gl_NumSubgroups; ++ii)
offsetG+= offsetsM[ii];//same but no data copying
}
if(gID == 0)
Draw[1].data[0].instanceCount = offsetG;
}
For renderpass after the compute I have dependencies:
{//1
deps[1].srcSubpass = VK_SUBPASS_EXTERNAL;
deps[1].dstSubpass = 0;
deps[1].srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
deps[1].dstStageMask = VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
deps[1].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
deps[1].dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
deps[1].dependencyFlags = 0;
}
{//2
deps[2].srcSubpass = VK_SUBPASS_EXTERNAL;
deps[2].dstSubpass = 0;
deps[2].srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
deps[2].dstStageMask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
deps[2].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
deps[2].dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
deps[2].dependencyFlags = 0;
}
The command buffer is(fully reused as is, one for each image in swapchain):
vkBeginCommandBuffer(cmd, &begInfo);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layoutsPipe[1],
0, 1, &descs[1], 0, 0);
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipes[1]);
vkCmdDispatch(cmd, 1, 1, 1);
VkBufferMemoryBarrier bufMemBar[2];
{//mem bars
{//0 indirect
bufMemBar[0].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
bufMemBar[0].dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
bufMemBar[0].buffer = bufferIndirect;
bufMemBar[0].offset = 0;
bufMemBar[0].size = -1;
}
{//1 vertex instance
bufMemBar[1].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
bufMemBar[1].dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
bufMemBar[1].buffer = bufferInstance;
bufMemBar[1].offset = 0;
bufMemBar[1].size = -1;
}
}
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, 0, 0, 0, 1, &bufMemBar[0], 0, 0);
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT , 0, 0, 0, 1, &bufMemBar[1], 0, 0);
VkRenderPassBeginInfo passBegInfo;
passBegInfo.renderPass = pass;
passBegInfo.framebuffer = chain.frames[i];
passBegInfo.renderArea = {{0, 0}, chain.dim};
VkClearValue clears[2]{{0},{0}};
passBegInfo.clearValueCount = 2;
passBegInfo.pClearValues = clears;
vkCmdBeginRenderPass(cmd, &passBegInfo, VK_SUBPASS_CONTENTS_INLINE);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, layoutsPipe[0], 0, 1, &descs[0], 0, 0);
vkCmdBindPipeline (cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipes[0]);
VkBuffer buffersVertex[2]{bufferVertexProto, bufferInstance};
VkDeviceSize offsetsVertex[2]{0, 0};
vkCmdBindVertexBuffers(cmd, 0, 2, buffersVertex, offsetsVertex);
vkCmdBindIndexBuffer (cmd, bufferIndex, 0, VK_INDEX_TYPE_UINT32);
vkCmdDrawIndexedIndirectCount(cmd, bufferIndirect, 0+4,
bufferIndirect, 0,
count.maxDraws, sizeof(VkDrawIndexedIndirectCommand));
vkCmdEndRenderPass(cmd);
vkEndCommandBuffer(cmd);
Rendering and presentation are synchronised with two semaphores - imageAvailable, and renderFinished. Frustum calculation is in right order on CPU. Validation layers are enabled.
The problem was that I lacked host synchronisation. Indeed, even within same command buffer, there are no host synchronisation guarantees (and that makes sense, since it enables us to use events).
Is it valid for a KTX image to be a cubemaps arrays, or is that not a thing?
I have some code that I'm currently using for uploading the data from a KTX file to the GPU. Currently, the code works for a regular 2d image, a cubemap, and a texture array. However, it would not support a KTX image that is a cubemap array, if that is a thing.
If it is possible, what is the code below missing to accomplish that?
uint32_t offset = 0;
for (uint32_t layer = 0; layer < layers; layer++) {
for (uint32_t face = 0; face < faces; face++) {
for (uint32_t level = 0; level < mipLevels; level++) {
offset = tex->GetImageOffset(layer, face, level);
vk::BufferImageCopy bufferCopyRegion = {};
bufferCopyRegion.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor;
bufferCopyRegion.imageSubresource.mipLevel = level;
bufferCopyRegion.imageSubresource.baseArrayLayer = (faces == 6 ? face : layer); // TexArray or Cubemap, not both.
bufferCopyRegion.imageSubresource.layerCount = 1;
bufferCopyRegion.imageExtent.width = width >> level;;
bufferCopyRegion.imageExtent.height = height >> level;
bufferCopyRegion.imageExtent.depth = 1;
bufferCopyRegion.bufferOffset = offset;
bufferCopyRegions.push_back(bufferCopyRegion);
}
}
}
Vulkan command to transfer the image.
// std::vector<vk::BufferImageCopy> regions;
cmdBuf->copyBufferToImage(srcBufferHandle, destImageHandle,
vk::ImageLayout::eTransferDstOptimal, uint32_t(regions.size()), regions.data());
Yes, KTX also supports cube map arrays (see the KTX specification). Those are stored using layers.
The Vulkan spec states the following on how cube maps are stored in a cube map array:
For cube arrays, each set of six sequential
layers is a single cube, so the number of cube maps in a cube map array view is layerCount / 6, and
image array layer (baseArrayLayer + i) is face index (i mod 6) of cube i / 6.
So you need to change the baseArrayLayer of your buffer copy region accordingly.
Sample code:
// Setup buffer copy regions to get the data from the ktx file to your own image
for (uint32_t layer = 0; layer < ktxTexture->numLayers; layer++) {
for (uint32_t face = 0; face < 6; face++) {
for (uint32_t level = 0; level < ktxTexture->numLevels; level++) {
ktx_size_t offset;
KTX_error_code ret = ktxTexture_GetImageOffset(ktxTexture, level, layer, face, &offset);
VkBufferImageCopy bufferCopyRegion = {};
bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
bufferCopyRegion.imageSubresource.mipLevel = level;
bufferCopyRegion.imageSubresource.baseArrayLayer = layer * 6 + face;
bufferCopyRegion.imageSubresource.layerCount = 1;
bufferCopyRegion.imageExtent.width = ktxTexture->baseWidth >> level;
bufferCopyRegion.imageExtent.height = ktxTexture->baseHeight >> level;
bufferCopyRegion.imageExtent.depth = 1;
bufferCopyRegion.bufferOffset = offset;
bufferCopyRegions.push_back(bufferCopyRegion);
}
}
}
// Create the image view for a cube map array
VkImageViewCreateInfo view = vks::initializers::imageViewCreateInfo();
view.viewType = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
view.format = format;
view.components = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A };
view.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
view.subresourceRange.layerCount = 6 * cubeMap.layerCount;
view.subresourceRange.levelCount = cubeMap.mipLevels;
view.image = cubeMap.image;
vkCreateImageView(device, &view, nullptr, &cubeMap.view);
First of all, my understanding of a descriptor range is that I can specify multiple buffers (constant buffers in my case) that a shader may use, is that correct? If not, then this is where my misunderstanding is, and the rest of the question will make no sense.
Lets say I want to pass a couple of constant buffers in my vertex shader
// Vertex.hlsl
float value0 : register(b0)
float value1 : register(b1)
...
And for whatever reason I want to use a descriptor range to specify b0 and b1. I fill out a D3D12_DESCRIPTOR_RANGE:
D3D12_DESCRIPTOR_RANGE range;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 2;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
I then go on to shove this into a root parameter
D3D12_ROOT_PARAMETER param;
param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
param.DescriptorTable.NumDescriptorRanges = 1;
param.DescriptorTable.pDescriptorRanges = ⦥
param.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
Root parameter goes into my signature description
D3D12_ROOT_SIGNATURE_DESC1 signatureDesc;
signatureDesc.NumParameters = 1;
signatureDesc.pParameters = ¶m;
signatureDesc.NumStaticSamplers = 0;
signatureDesc.pStaticSamplers = nullptr;
D3D12_ROOT_SIGNATURE_FLAGS = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
After this is create my root signature and so on. I also created a heap for 2 descriptors
D3D12_DESCRIPTOR_HEAP_DESC heapDescCbv;
heapDescCbv.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
heapDescCbv.NumDescriptors = 2;
heapDescCbv.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
heapDescCbv.NodeMask = 0;
ThrowIfFailed(m_device->CreateDescriptorHeap(&heapDescCbv, IID_PPV_ARGS(&m_cbvHeap)));
I then mapped the respective ID3D12Resource's to get two pointers so I can memcpy my values to them.
void D3D12App::AllocateConstantBuffer(SIZE_T index, size_t dataSize, ID3D12Resource** buffer, void** mappedPtr)
{
D3D12_HEAP_PROPERTIES heapProp;
heapProp.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heapProp.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
heapProp.CreationNodeMask = 1;
heapProp.VisibleNodeMask = 1;
heapProp.Type = D3D12_HEAP_TYPE_UPLOAD;
D3D12_RESOURCE_DESC resDesc;
resDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
resDesc.Alignment = 0;
resDesc.Width = (dataSize + 255) & ~255;
resDesc.Height = 1;
resDesc.DepthOrArraySize = 1;
resDesc.MipLevels = 1;
resDesc.Format = DXGI_FORMAT_UNKNOWN;
resDesc.SampleDesc.Count = 1;
resDesc.SampleDesc.Quality = 0;
resDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
resDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
ThrowIfFailed(m_device->CreateCommittedResource(&heapProp, D3D12_HEAP_FLAG_NONE,
&resDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(buffer)));
D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc;
cbvDesc.BufferLocation = (*buffer)->GetGPUVirtualAddress();
cbvDesc.SizeInBytes = (dataSize + 255) & ~255;
auto cbvHandle = m_cbvHeap->GetCPUDescriptorHandleForHeapStart();
cbvHandle.ptr += index * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
m_device->CreateConstantBufferView(&cbvDesc, cbvHandle);
D3D12_RANGE readRange;
readRange.Begin = 0;
readRange.End = 0;
ThrowIfFailed((*buffer)->Map(0, &readRange, mappedPtr));
}
AllocateConstantBuffer(0, sizeof(m_value0), &m_value0Resource, reinterpret_cast<void**>&m_constPtrvalue0));
AllocateConstantBuffer(1, sizeof(m_value1), &m_value1Resource, reinterpret_cast<void**>&m_constPtrvalue1));
The problem is when I want to feed them to the pipeline. When rendering, I used
auto cbvHandle = m_cbvHeap->GetGPUDescriptorHandleForHeapStart();
m_commandList->SetGraphicsRootDescriptorTable(0, cbvHandle);
The result I get is only register(b0) got the the correct value, and register(b1) remains uninitialized. What did I do wrong?
OK I got it to work. Turned out I need to change the shader a bit:
cbuffer a : register(b0) { float value0; }
cbuffer b : register(b1) { float value1; };
This gave me another question though, according to this link:
https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-constants
the buffer names a and b should be optional, but when I tried that the shaders cannot compile. I guess that is a different question.
I've just started up using Direct compute in an attempt to move a fluid simulation I have been working on, onto the GPU. I have found a very similar (if not identical) question here however seems the resolution to my problem is not the same as theirs; I do have my CopyResource the right way round for sure! As with the pasted question, I only get a buffer filled with 0's when copy back from the GPU. I really can't see the error as I don't understand how I can be reaching out of bounds limits. I'm going to apologise for the mass amount of code pasting about to occur but I want be sure I've not got any of the setup wrong.
Output Buffer, UAV and System Buffer set up
outputDesc.Usage = D3D11_USAGE_DEFAULT;
outputDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
outputDesc.ByteWidth = sizeof(BoundaryConditions) * numElements;
outputDesc.CPUAccessFlags = 0;
outputDesc.StructureByteStride = sizeof(BoundaryConditions);
outputDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
result =_device->CreateBuffer(&outputDesc, 0, &m_outputBuffer);
outputDesc.Usage = D3D11_USAGE_STAGING;
outputDesc.BindFlags = 0;
outputDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
result = _device->CreateBuffer(&outputDesc, 0, &m_outputresult);
D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc;
uavDesc.Format = DXGI_FORMAT_UNKNOWN;
uavDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
uavDesc.Buffer.FirstElement = 0;
uavDesc.Buffer.Flags = 0;
uavDesc.Buffer.NumElements = numElements;
result =_device->CreateUnorderedAccessView(m_outputBuffer, &uavDesc, &m_BoundaryConditionsUAV);
Running the Shader in my frame loop
HRESULT result;
D3D11_MAPPED_SUBRESOURCE mappedResource;
_deviceContext->CSSetShader(m_BoundaryConditionsCS, nullptr, 0);
_deviceContext->CSSetUnorderedAccessViews(0, 1, &m_BoundaryConditionsUAV, 0);
_deviceContext->Dispatch(1, 1, 1);
// Unbind output from compute shader
ID3D11UnorderedAccessView* nullUAV[] = { NULL };
_deviceContext->CSSetUnorderedAccessViews(0, 1, nullUAV, 0);
// Disable Compute Shader
_deviceContext->CSSetShader(nullptr, nullptr, 0);
_deviceContext->CopyResource(m_outputresult, m_outputBuffer);
D3D11_MAPPED_SUBRESOURCE mappedData;
result = _deviceContext->Map(m_outputresult, 0, D3D11_MAP_READ, 0, &mappedData);
BoundaryConditions* newbc = reinterpret_cast<BoundaryConditions*>(mappedData.pData);
for (int i = 0; i < 4; i++)
{
Debug::Instance()->Log(newbc[i].x.x);
}
_deviceContext->Unmap(m_outputresult, 0);
HLSL
struct BoundaryConditions
{
float3 x;
float3 y;
};
RWStructuredBuffer<BoundaryConditions> _boundaryConditions;
[numthreads(4, 1, 1)]
void ComputeBoundaryConditions(int3 id : SV_DispatchThreadID)
{
_boundaryConditions[id.x].x = float3(id.x,id.y,id.z);
}
I dispatch the Compute shader after I begin a frame and before I end the frame. I have played around with moving the shaders dispatch call outside of the end scene and before the present ect but nothing seems to effect the process. Can't seem to figure this one out!
Holy Smokes I fixed the error! I was creating the compute shader to a different ID3D11ComputeShader pointer! D: Works like a charm! Pheew Sorry and thanks Adam!