Related
I am attempting to make a game engine. Currently I am trying to implement a HDR skybox which also has mip maps, however have been stuck with an error for a while. The Example runs completely fine loading non-HDR 8 bit colour textures. But the moment I attempt to use VK_FORMAT_R32G32B32A32_SFLOAT (which I'm fairly sure is the correct format in order to correspond to 4 bit floats loaded from stbi) the command buffer fails to ever complete and seems to always be in a pending state, I have tried giving the texture manager its own command buffer with no luck and using both vkQueueWaitIdle as well as fences but they both return VK_SUCCEED. The validation layers then throw an error once vkResetCommandBuffer is invoked because the command buffer is in pending state. Seemingly it sometimes rarely works if I click to focus on the console, more errors appear afterwards but was not always the case and HDR seemed to be working once it loaded but was still a 1/3 occasion mostly throwing the same error.
This is the code that is used to load the cubemaps in:
Cubemap::Cubemap(CubemapInfo cubemapInfo)
{
RenderSystem& renderSystem = RenderSystem::instance();
TextureManager& textureManager = TextureManager::instance();
VkImageFormatProperties formatProperties;
assert(("[ERROR] Unsupported texture format", !vkGetPhysicalDeviceImageFormatProperties(renderSystem.mPhysicalDevice, cubemapInfo.format, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0, &formatProperties)));
FormatInfo formatInfo = getFormatInfo(cubemapInfo.format);
#pragma region Create cubemap resources
stbi_set_flip_vertically_on_load(true);
void* textureData[6];
// Load images
int width, height, channels;
bool hdr = cubemapInfo.format == VK_FORMAT_R16_SFLOAT || cubemapInfo.format == VK_FORMAT_R16G16_SFLOAT || cubemapInfo.format == VK_FORMAT_R16G16B16_SFLOAT || cubemapInfo.format == VK_FORMAT_R16G16B16A16_SFLOAT || cubemapInfo.format == VK_FORMAT_R32_SFLOAT || cubemapInfo.format == VK_FORMAT_R32G32_SFLOAT || cubemapInfo.format == VK_FORMAT_R32G32B32_SFLOAT || cubemapInfo.format == VK_FORMAT_R32G32B32A32_SFLOAT;
if (hdr)
{
if (formatInfo.bytesPerChannel == 4)
{
for (unsigned int i = 0; i < 6; i++)
{
textureData[i] = stbi_loadf(cubemapInfo.directories[i].c_str(), &width, &height, &channels, formatInfo.nChannels);
}
}
else if (formatInfo.bytesPerChannel == 2)
{
for (unsigned int i = 0; i < 6; i++)
{
float* data = stbi_loadf(cubemapInfo.directories[i].c_str(), &width, &height, &channels, formatInfo.nChannels);
unsigned long long dataSize = width * height * formatInfo.nChannels;
textureData[i] = new float16[dataSize];
for (unsigned long long j = 0; j < dataSize; j++)
{
((float16*)textureData[i])[j] = floatToFloat16(data[j]);
}
stbi_image_free((void*)data);
}
}
}
else
{
for (unsigned int i = 0; i < 6; i++)
{
textureData[i] = stbi_load(cubemapInfo.directories[i].c_str(), &width, &height, &channels, formatInfo.nChannels);
}
}
const VkDeviceSize imageSize = 6 * VkDeviceSize(width) * height * formatInfo.nChannels * formatInfo.bytesPerChannel;
unsigned int nMips = unsigned int(std::floor(std::log2(width > height ? width : height))) + 1;
assert(("[ERROR] Unsupported texture format", formatProperties.maxExtent.width >= width && formatProperties.maxExtent.height >= height && formatProperties.maxExtent.depth >= 1 && formatProperties.maxMipLevels >= 1 && formatProperties.maxArrayLayers >= 1 && formatProperties.sampleCounts & VK_SAMPLE_COUNT_1_BIT && formatProperties.maxResourceSize >= imageSize));
// Create image
VkImageCreateInfo imageCreateInfo = {};
imageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
imageCreateInfo.pNext = nullptr;
imageCreateInfo.flags = VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
imageCreateInfo.format = cubemapInfo.format;
imageCreateInfo.extent = { unsigned int(width), unsigned int(height), 1 };
imageCreateInfo.mipLevels = nMips;
imageCreateInfo.arrayLayers = 6;
imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imageCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
imageCreateInfo.queueFamilyIndexCount = 0;
imageCreateInfo.pQueueFamilyIndices = nullptr;
imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VkResult result = vkCreateImage(renderSystem.mDevice, &imageCreateInfo, nullptr, &mImage);
validateResult(result);
VkMemoryRequirements memoryRequirements;
vkGetImageMemoryRequirements(renderSystem.mDevice, mImage, &memoryRequirements);
VkMemoryAllocateInfo memoryAllocateInfo = {};
memoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
memoryAllocateInfo.allocationSize = memoryRequirements.size;
memoryAllocateInfo.memoryTypeIndex = memoryTypeFromProperties(renderSystem.mPhysicalDeviceMemoryProperties, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
result = vkAllocateMemory(renderSystem.mDevice, &memoryAllocateInfo, nullptr, &mImageMemory);
validateResult(result);
result = vkBindImageMemory(renderSystem.mDevice, mImage, mImageMemory, 0);
validateResult(result);
// Create staging buffer
VkBuffer stagingBuffer;
VkDeviceMemory stagingMemory;
VkBufferCreateInfo bufferCreateInfo = {};
bufferCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferCreateInfo.pNext = nullptr;
bufferCreateInfo.flags = 0;
bufferCreateInfo.size = imageSize;
bufferCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
bufferCreateInfo.queueFamilyIndexCount = 0;
bufferCreateInfo.pQueueFamilyIndices = nullptr;
result = vkCreateBuffer(renderSystem.mDevice, &bufferCreateInfo, nullptr, &stagingBuffer);
validateResult(result);
vkGetBufferMemoryRequirements(renderSystem.mDevice, stagingBuffer, &memoryRequirements);
memoryAllocateInfo.allocationSize = memoryRequirements.size;
memoryAllocateInfo.memoryTypeIndex = memoryTypeFromProperties(renderSystem.mPhysicalDeviceMemoryProperties, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
result = vkAllocateMemory(renderSystem.mDevice, &memoryAllocateInfo, nullptr, &stagingMemory);
validateResult(result);
result = vkBindBufferMemory(renderSystem.mDevice, stagingBuffer, stagingMemory, 0);
validateResult(result);
unsigned char* data;
result = vkMapMemory(renderSystem.mDevice, stagingMemory, 0, imageSize, 0, (void**)&data);
validateResult(result);
unsigned long long dataLayer = unsigned long long(width) * height * formatInfo.nChannels * formatInfo.bytesPerChannel;
for (unsigned int i = 0; i < 6; i++)
{
memcpy((void*)(data + i * dataLayer), textureData[i], dataLayer);
stbi_image_free(textureData[i]);
}
vkUnmapMemory(renderSystem.mDevice, stagingMemory);
result = vkBeginCommandBuffer(textureManager.mCommandBuffer, &renderSystem.mCommandBufferBeginInfo);
validateResult(result);
VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT /* Additional >> */ | VK_ACCESS_TRANSFER_READ_BIT;
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = mImage;
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
barrier.subresourceRange.baseMipLevel = 0;
barrier.subresourceRange.levelCount = nMips;
barrier.subresourceRange.baseArrayLayer = 0;
barrier.subresourceRange.layerCount = 6;
vkCmdPipelineBarrier(textureManager.mCommandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier);
VkBufferImageCopy copyRegion = {};
copyRegion.bufferOffset = 0;
copyRegion.bufferRowLength = 0;
copyRegion.bufferImageHeight = 0;
copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copyRegion.imageSubresource.mipLevel = 0;
copyRegion.imageSubresource.baseArrayLayer = 0;
copyRegion.imageSubresource.layerCount = 6;
copyRegion.imageOffset = { 0, 0, 0 };
copyRegion.imageExtent = { unsigned int(width), unsigned int(height), 1 };
vkCmdCopyBufferToImage(textureManager.mCommandBuffer, stagingBuffer, mImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©Region);
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
barrier.subresourceRange.levelCount = 1;
VkImageBlit imageBlit = {};
imageBlit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
imageBlit.srcSubresource.baseArrayLayer = 0;
imageBlit.srcSubresource.layerCount = 6;
imageBlit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
imageBlit.dstSubresource.baseArrayLayer = 0;
imageBlit.dstSubresource.layerCount = 6;
unsigned int mipWidth = width, mipHeight = height;
for (unsigned int i = 1; i < nMips; i++)
{
barrier.subresourceRange.baseMipLevel = i - 1;
vkCmdPipelineBarrier(textureManager.mCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier);
imageBlit.srcSubresource.mipLevel = i - 1;
imageBlit.srcOffsets[0] = { 0, 0, 0 };
imageBlit.srcOffsets[1] = { int(mipWidth), int(mipHeight), 1 };
imageBlit.dstSubresource.mipLevel = i;
if (mipWidth > 1)
mipWidth /= 2;
if (mipHeight > 1)
mipHeight /= 2;
imageBlit.dstOffsets[0] = { 0, 0, 0 };
imageBlit.dstOffsets[1] = { int(mipWidth), int(mipHeight), 1 };
vkCmdBlitImage(textureManager.mCommandBuffer, mImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, mImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &imageBlit, VK_FILTER_LINEAR);
}
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
for (unsigned int i = 0; i < nMips; i++)
{
barrier.oldLayout = i == nMips - 1 ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
barrier.subresourceRange.baseMipLevel = i;
vkCmdPipelineBarrier(textureManager.mCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier);
}
result = vkEndCommandBuffer(textureManager.mCommandBuffer);
validateResult(result);
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.pNext = nullptr;
submitInfo.waitSemaphoreCount = 0;
submitInfo.pWaitSemaphores = nullptr;
submitInfo.pWaitDstStageMask = nullptr;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &textureManager.mCommandBuffer;
submitInfo.signalSemaphoreCount = 0;
submitInfo.pSignalSemaphores = nullptr;
result = vkQueueSubmit(renderSystem.mGraphicsQueue, 1, &submitInfo, NULL);
validateResult(result);
// Create image view
VkImageViewCreateInfo imageViewCreateInfo = {};
imageViewCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
imageViewCreateInfo.pNext = nullptr;
imageViewCreateInfo.flags = 0;
imageViewCreateInfo.image = mImage;
imageViewCreateInfo.viewType = VK_IMAGE_VIEW_TYPE_CUBE;
imageViewCreateInfo.format = cubemapInfo.format;
imageViewCreateInfo.components = formatInfo.componentMapping;
imageViewCreateInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
imageViewCreateInfo.subresourceRange.baseMipLevel = 0;
imageViewCreateInfo.subresourceRange.levelCount = nMips;
imageViewCreateInfo.subresourceRange.baseArrayLayer = 0;
imageViewCreateInfo.subresourceRange.layerCount = 6;
result = vkCreateImageView(renderSystem.mDevice, &imageViewCreateInfo, nullptr, &mImageView);
validateResult(result);
// Create sampler
VkSamplerCreateInfo samplerCreateInfo = {};
samplerCreateInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
samplerCreateInfo.pNext = nullptr;
samplerCreateInfo.flags = 0;
samplerCreateInfo.magFilter = VK_FILTER_LINEAR;
samplerCreateInfo.minFilter = VK_FILTER_LINEAR;
samplerCreateInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
samplerCreateInfo.mipLodBias = 0.0f;
samplerCreateInfo.minLod = 0.0f;
samplerCreateInfo.maxLod = float(nMips);
samplerCreateInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;;
samplerCreateInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerCreateInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerCreateInfo.anisotropyEnable = VK_TRUE;
samplerCreateInfo.maxAnisotropy = renderSystem.mPhysicalDeviceProperties.limits.maxSamplerAnisotropy;
samplerCreateInfo.compareEnable = VK_FALSE;
samplerCreateInfo.compareOp = VK_COMPARE_OP_ALWAYS;
samplerCreateInfo.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK;
samplerCreateInfo.unnormalizedCoordinates = VK_FALSE;
result = vkCreateSampler(renderSystem.mDevice, &samplerCreateInfo, nullptr, &mSampler);
validateResult(result);
result = vkQueueWaitIdle(renderSystem.mGraphicsQueue);
validateResult(result);
result = vkResetCommandBuffer(textureManager.mCommandBuffer, 0);
validateResult(result);
vkDestroyBuffer(renderSystem.mDevice, stagingBuffer, nullptr);
vkFreeMemory(renderSystem.mDevice, stagingMemory, nullptr);
#pragma endregion
}
Exact errors that occurr:
VUID-vkResetCommandBuffer-commandBuffer-00045(ERROR / SPEC): msgNum: 511214570 - Validation Error: [ VUID-vkResetCommandBuffer-commandBuffer-00045 ] Object 0: handle = 0x19323492138, type = VK_OBJECT_TYPE_COMMAND_BUFFER; | MessageID = 0x1e7883ea | Attempt to reset VkCommandBuffer 0x19323492138[] which is in use. The Vulkan spec states: commandBuffer must not be in the pending state (https://vulkan.lunarg.com/doc/view/1.2.162.1/windows/1.2-extensions/vkspec.html#VUID-vkResetCommandBuffer-commandBuffer-00045)
Objects: 1
[0] 0x19323492138, type: 6, name: NULL
UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout(ERROR / SPEC): msgNum: 1303270965 - Validation Error: [ UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout ] Object 0: handle = 0x19323492138, type = VK_OBJECT_TYPE_COMMAND_BUFFER; | MessageID = 0x4dae5635 | Submitted command buffer expects VkImage 0x5fb0e800000000cd[] (subresource: aspectMask 0x1 array layer 0, mip level 0) to be in layout VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL--instead, current layout is VK_IMAGE_LAYOUT_UNDEFINED.
Objects: 1
[0] 0x19323492138, type: 6, name: NULL
(^ This error is thrown continuously for mip levels 0-12 ^)
View full source:
https://github.com/finnbuhse/Vulkan-Engine-V1.0
Although assets and shader binaries are not on the github so compile the shader sources into files with names identical to those found in mesh.cpp line 1083 and adjust main.cpp to include custom models if you wish to try compile and run the source.
Any clue as to why this might be happening would be greatly appreciated
So, after almost an entire year of agonising over this one error... I found what seemed to have happened was the GPU memory ran out, and the skybox I picked initially was simply too large; each face was 4K and I found the whole cubemap had to be allocated over a gigabyte of video memory, and having a mere NVIDIA GTX 1050 Ti, is a quarter of it. However I did think this was possible early on, which is why I validated every VkResult I could thinking if this happened, VK_ERROR_OUT_OF_DEVICE_MEMORY would be returned. However nothing but success from what the 'results' could tell. Perhaps it wasn't so much video memory but the GPU had a hard time mip-mapping such a large image. Either way, with a different HDR skybox (1k) it works perfectly fine with both 16 bit floating point images aswell as 32 bits.
When resizing the images of the swapchain (because the window size changed) I am getting some white blinking. I don't really understand why is the source of this issue.
I have this issue only while using VK_PRESENT_MODE_FIFO_KHR present mode with my Intel(R) UHD Graphics 630 integrated GPU, I don't have this issue with the GeForce GTX 1050. I am find that having different behaviors depending on the GPU really curious with Vulkan.
Maybe the ideal solution for what I try to achieve is to have a swapchain that always do the size of the screen and blit only the visible part if it is possible to do it?
Here is my swapchain resize code (far from optimal as I redo some operations that can be avoided).
bool resize_swapchain(VK_Renderer* renderer, Window* window) {
assert(renderer);
VkResult res;
clear_swapchain(renderer);
// Build the swapchain
// Get the list of VkFormats that are supported:
get_enumeration(vkGetPhysicalDeviceSurfaceFormatsKHR,
VkSurfaceFormatKHR,
surface_formats,
"Failed to get physical device surface formats.\n",
"Found %d surface formats.\n",
renderer->physical_device,
renderer->surface);
// If the format list includes just one entry of VK_FORMAT_UNDEFINED,
// the surface has no preferred format. Otherwise, at least one
// supported format will be returned.
if (surface_formats.size() == 1 && surface_formats[0].format == VK_FORMAT_UNDEFINED) {
renderer->surface_format = VK_FORMAT_B8G8R8A8_UNORM;
} else {
renderer->surface_format = surface_formats[0].format;
}
VkSurfaceCapabilitiesKHR surface_capabilities;
res = vkGetPhysicalDeviceSurfaceCapabilitiesKHR(renderer->physical_device, renderer->surface, &surface_capabilities);
if (res != VK_SUCCESS) {
log(globals.logger, Log_Level::error, "Failed to get physical device surface capabilities.\n");
clear_swapchain(renderer);
return false;
}
get_enumeration(vkGetPhysicalDeviceSurfacePresentModesKHR,
VkPresentModeKHR,
present_modes,
"Failed to get physical device surface present modes.\n",
"Found %d present modes.\n",
renderer->physical_device,
renderer->surface);
// width and height are either both 0xFFFFFFFF, or both not 0xFFFFFFFF.
if (surface_capabilities.currentExtent.width == 0xFFFFFFFF) {
// If the surface size is undefined, the size is set to
// the size of the images requested.
renderer->swapchain_extent.width = window->size.x;
renderer->swapchain_extent.height = window->size.y;
if (renderer->swapchain_extent.width < surface_capabilities.minImageExtent.width) {
renderer->swapchain_extent.width = surface_capabilities.minImageExtent.width;
} else if (renderer->swapchain_extent.width > surface_capabilities.maxImageExtent.width) {
renderer->swapchain_extent.width = surface_capabilities.maxImageExtent.width;
}
if (renderer->swapchain_extent.height < surface_capabilities.minImageExtent.height) {
renderer->swapchain_extent.height = surface_capabilities.minImageExtent.height;
} else if (renderer->swapchain_extent.height > surface_capabilities.maxImageExtent.height) {
renderer->swapchain_extent.height = surface_capabilities.maxImageExtent.height;
}
} else {
// If the surface size is defined, the swap chain size must match
renderer->swapchain_extent = surface_capabilities.currentExtent;
}
// The FIFO present mode is guaranteed by the spec to be supported
#if defined(FL_PROFILING_MODE)
VkPresentModeKHR swapchain_present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR;
#else
VkPresentModeKHR swapchain_present_mode = VK_PRESENT_MODE_FIFO_KHR;
#endif
// Determine the number of VkImage's to use in the swap chain.
// We need to acquire only 1 presentable image at at time.
// Asking for minImageCount images ensures that we can acquire
// 1 presentable image as long as we present it before attempting
// to acquire another.
uint32_t desired_number_of_swapchain_images = surface_capabilities.minImageCount;
VkSurfaceTransformFlagBitsKHR surface_transform;
if (surface_capabilities.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR) {
surface_transform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
} else {
surface_transform = surface_capabilities.currentTransform;
}
// Find a supported composite alpha mode - one of these is guaranteed to be set
VkCompositeAlphaFlagBitsKHR composite_alpha_flag = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
// TODO change the order if we want to be able to blend the window of our application with the Windows Desktop
VkCompositeAlphaFlagBitsKHR composite_alpha_flags[4] = {
VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR,
VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR,
VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR,
};
for (uint32_t i = 0; i < sizeof(composite_alpha_flags); i++) {
if (surface_capabilities.supportedCompositeAlpha & composite_alpha_flags[i]) {
composite_alpha_flag = composite_alpha_flags[i];
break;
}
}
VkSwapchainCreateInfoKHR swapchain_info = {};
swapchain_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
swapchain_info.pNext = nullptr;
swapchain_info.surface = renderer->surface;
swapchain_info.minImageCount = desired_number_of_swapchain_images;
swapchain_info.imageFormat = renderer->surface_format;
swapchain_info.imageExtent.width = renderer->swapchain_extent.width;
swapchain_info.imageExtent.height = renderer->swapchain_extent.height;
swapchain_info.preTransform = surface_transform;
swapchain_info.compositeAlpha = composite_alpha_flag;
swapchain_info.imageArrayLayers = 1;
swapchain_info.presentMode = swapchain_present_mode;
swapchain_info.oldSwapchain = nullptr;
swapchain_info.clipped = true;
swapchain_info.imageColorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR;
swapchain_info.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
swapchain_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
swapchain_info.queueFamilyIndexCount = 0;
swapchain_info.pQueueFamilyIndices = nullptr;
uint32_t queue_family_indices[2] = {(uint32_t)renderer->graphics_queue_family_index, (uint32_t)renderer->present_queue_family_index};
if (renderer->graphics_queue_family_index != renderer->present_queue_family_index) {
// If the graphics and present queues are from different queue families,
// we either have to explicitly transfer ownership of images between
// the queues, or we have to create the swapchain with imageSharingMode
// as VK_SHARING_MODE_CONCURRENT
swapchain_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
swapchain_info.queueFamilyIndexCount = 2;
swapchain_info.pQueueFamilyIndices = queue_family_indices;
// TODO #Speedup We may want optimize this by using VK_SHARING_MODE_EXCLUSIVE and be explicit about transfert ownership
}
res = vkCreateSwapchainKHR(renderer->device, &swapchain_info, nullptr, &renderer->swapchain);
if (res != VK_SUCCESS) {
log(globals.logger, Log_Level::error, "Failed to create the swapchain.\n");
clear_swapchain(renderer);
return false;
}
log(globals.logger, Log_Level::verbose, "Swapchain created with size (%d, %d).\n",
swapchain_info.imageExtent.width,
swapchain_info.imageExtent.height);
get_enumeration(vkGetSwapchainImagesKHR,
VkImage,
swapchain_images,
"Failed to get swapchain images.\n",
"Found %d swapchain images.\n",
renderer->device,
renderer->swapchain);
renderer->swapchain_buffers.resize(swapchain_images.size());
for (uint32_t i = 0; i < swapchain_images.size(); i++) {
renderer->swapchain_buffers[i].image = swapchain_images[i];
}
for (uint32_t i = 0; i < swapchain_images.size(); i++) {
VkImageViewCreateInfo color_image_view = {};
color_image_view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
color_image_view.pNext = nullptr;
color_image_view.flags = 0;
color_image_view.image = renderer->swapchain_buffers[i].image;
color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D;
color_image_view.format = renderer->surface_format;
color_image_view.components.r = VK_COMPONENT_SWIZZLE_R;
color_image_view.components.g = VK_COMPONENT_SWIZZLE_G;
color_image_view.components.b = VK_COMPONENT_SWIZZLE_B;
color_image_view.components.a = VK_COMPONENT_SWIZZLE_A;
color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
color_image_view.subresourceRange.baseMipLevel = 0;
color_image_view.subresourceRange.levelCount = 1;
color_image_view.subresourceRange.baseArrayLayer = 0;
color_image_view.subresourceRange.layerCount = 1;
res = vkCreateImageView(renderer->device, &color_image_view, nullptr, &renderer->swapchain_buffers[i].view);
if (res != VK_SUCCESS) {
log(globals.logger, Log_Level::error, "Failed to create image view.\n");
clear_swapchain(renderer);
return false;
}
log(globals.logger, Log_Level::verbose, "Image view %d created.\n", i);
}
// Build the depth buffer
VkImageCreateInfo image_info = {};
const VkFormat depth_format = VK_FORMAT_D32_SFLOAT;
VkFormatProperties format_properties;
bool found_memory_type_index;
vkGetPhysicalDeviceFormatProperties(renderer->physical_device, depth_format, &format_properties);
if (format_properties.linearTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) {
image_info.tiling = VK_IMAGE_TILING_LINEAR;
} else if (format_properties.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) {
image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
} else {
// #TODO choose an other format?
log(globals.logger, Log_Level::error, "VK_FORMAT_D32_SFLOAT Unsupported.\n");
clear_swapchain(renderer);
return false;
}
image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
image_info.pNext = nullptr;
image_info.imageType = VK_IMAGE_TYPE_2D;
image_info.format = depth_format;
image_info.extent.width = renderer->swapchain_extent.width;
image_info.extent.height = renderer->swapchain_extent.height;
image_info.extent.depth = 1;
image_info.mipLevels = 1;
image_info.arrayLayers = 1;
image_info.samples = renderer->sample_count_flag;
image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
image_info.queueFamilyIndexCount = 0;
image_info.pQueueFamilyIndices = nullptr;
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_info.flags = 0;
VkMemoryAllocateInfo memory_allocation_info = {};
memory_allocation_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
memory_allocation_info.pNext = nullptr;
memory_allocation_info.allocationSize = 0;
memory_allocation_info.memoryTypeIndex = 0;
VkImageViewCreateInfo view_info = {};
view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
view_info.pNext = nullptr;
view_info.image = nullptr;
view_info.format = depth_format;
view_info.components.r = VK_COMPONENT_SWIZZLE_R;
view_info.components.g = VK_COMPONENT_SWIZZLE_G;
view_info.components.b = VK_COMPONENT_SWIZZLE_B;
view_info.components.a = VK_COMPONENT_SWIZZLE_A;
view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
view_info.subresourceRange.baseMipLevel = 0;
view_info.subresourceRange.levelCount = 1;
view_info.subresourceRange.baseArrayLayer = 0;
view_info.subresourceRange.layerCount = 1;
view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
view_info.flags = 0;
VkMemoryRequirements memory_requirements;
renderer->depth_buffer.format = depth_format;
/* Create image */
res = vkCreateImage(renderer->device, &image_info, nullptr, &renderer->depth_buffer.image);
if (res != VK_SUCCESS) {
log(globals.logger, Log_Level::error, "Failed to create the depth image.\n");
clear_swapchain(renderer);
return false;
}
vkGetImageMemoryRequirements(renderer->device, renderer->depth_buffer.image, &memory_requirements);
memory_allocation_info.allocationSize = memory_requirements.size;
/* Use the memory properties to determine the type of memory required */
found_memory_type_index = memory_type_from_properties(renderer, memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &memory_allocation_info.memoryTypeIndex);
if (!found_memory_type_index) {
log(globals.logger, Log_Level::error, "Failed to find memory type to allocate the depth image.\n");
clear_swapchain(renderer);
return false;
}
/* Allocate memory */
res = vkAllocateMemory(renderer->device, &memory_allocation_info, nullptr, &renderer->depth_buffer.memory);
if (res != VK_SUCCESS) {
log(globals.logger, Log_Level::error, "Failed to create memory for depth image.\n");
clear_swapchain(renderer);
return false;
}
/* Bind memory */
res = vkBindImageMemory(renderer->device, renderer->depth_buffer.image, renderer->depth_buffer.memory, 0);
if (res != VK_SUCCESS) {
log(globals.logger, Log_Level::error, "Failed to bind the depth image memory.\n");
clear_swapchain(renderer);
return false;
}
/* Create image view */
view_info.image = renderer->depth_buffer.image;
res = vkCreateImageView(renderer->device, &view_info, nullptr, &renderer->depth_buffer.view);
if (res != VK_SUCCESS) {
log(globals.logger, Log_Level::error, "Failed to create the depth image view.\n");
clear_swapchain(renderer);
return false;
}
log(globals.logger, Log_Level::verbose, "Depth buffer created.\n");
for (size_t i = 0; i < renderer->scenes.size(); i++) {
swapchain_resized(renderer->scenes[i], renderer->swapchain_extent.width, renderer->swapchain_extent.height);
}
return true;
}
Edit: Maybe my issue is more related on how I submit rendered images to the swapchain or the image acquisition.
for (size_t i = 0; i < scene->meshes.size(); i++) {
draw_mesh(scene->meshes[i]);
}
// End the Render pass
vkCmdEndRenderPass(scene->renderer->graphical_command_buffer);
// End command buffer
{
res = vkEndCommandBuffer(scene->renderer->graphical_command_buffer);
}
// Execute queue command buffer
{
/* Queue the command buffer for execution */
const VkCommandBuffer command_buffers[] = {scene->renderer->graphical_command_buffer};
VkFenceCreateInfo fence_create_info;
VkFence draw_fence;
fence_create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fence_create_info.pNext = nullptr;
fence_create_info.flags = 0;
vkCreateFence(scene->renderer->device, &fence_create_info, nullptr, &draw_fence);
VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkSubmitInfo submit_info[1] = {};
submit_info[0].pNext = nullptr;
submit_info[0].sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit_info[0].waitSemaphoreCount = 1;
submit_info[0].pWaitSemaphores = &scene->image_acquired_semaphore;
submit_info[0].pWaitDstStageMask = &pipe_stage_flags;
submit_info[0].commandBufferCount = 1;
submit_info[0].pCommandBuffers = command_buffers;
submit_info[0].signalSemaphoreCount = 0;
submit_info[0].pSignalSemaphores = nullptr;
/* Queue the command buffer for execution */
res = vkQueueSubmit(scene->renderer->graphics_queue, 1, submit_info, draw_fence);
assert(res == VK_SUCCESS);
/* Now present the image in the window */
VkPresentInfoKHR present;
present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
present.pNext = nullptr;
present.swapchainCount = 1;
present.pSwapchains = &scene->renderer->swapchain;
present.pImageIndices = &scene->current_buffer;
present.pWaitSemaphores = nullptr;
present.waitSemaphoreCount = 0;
present.pResults = nullptr;
/* Make sure command buffer is finished before presenting */
do {
res = vkWaitForFences(scene->renderer->device, 1, &draw_fence, VK_TRUE, scene->renderer->draw_fence_timeout_us);
} while (res == VK_TIMEOUT);
assert(res == VK_SUCCESS);
res = vkQueuePresentKHR(scene->renderer->present_queue, &present);
assert(res == VK_SUCCESS);
vkDestroyFence(scene->renderer->device, draw_fence, nullptr);
}
On vulkan-tutorial.com it is write that we should also recreate the command buffers with the swapchain (https://vulkan-tutorial.com/Drawing_a_triangle/Swap_chain_recreation), is it really mandatory?
I'm the guy from that reddit thread. I'm not sure 100% if we have the exact same issue, but I can explain what I was dealing with and how I worked around it.
So the issue here has several layers. The first one is that window resizing on Windows blocks the message queue because it needs to capture all the input events for itself. So to work around this you need to make your window update asynchronous, for example through threads.
Now your rendering and window resized work asynchronously, which is great, until someone resizes the windows while you are halfway through rendering the new frame. This IMMEDIATELY causes the swapchain to be VK_ERROR_OUT_OF_DATE_KHR, making you unable to present your render result to the screen. This can cause a variety of different artifacts on the surface, depening on GPU vendor, driver version, and even between different GPUs from the same vendor. This is quite literally undefined behavior. But flickering is definitely one of the common results, where it simply doesn't show anything on the surface until a new successful suspend. So far I have not found a single vendor that supports VK_SUBOPTIMAL_KHR to allow you to keep rendering.
A naive solution would be to give the window full control over the framerate instead, but would give very poor and inconsistent frame timings, especially when going over 60hz. You want the rendering to run as fast as it can, with as little latency as possible.
So before I go into the solution, lets summarize the requirements:
The application (including rendering) doesn't freeze when resizing.
The window does not resize between Acquire and Present
The frame timings are not controlled by the window message queue*
* When not resizing
You might have noticed the asterisk on the last requirement. This is because we will have to make a small compromise. The idea is that we only let the window take control over the frame timings when it is resizing. Outside of that we can draw as fast as possible, since nothing else can invalidate the swapchain in between.
To do this I used Fibers. You can think of fibers as a stack without a thread. You can then jump from the fiber to a different fiber and back. Remember that the message queue (specifically GetMessage/PeekMessage calls) doesn't return when resizing? Well, you can jump out of that loop and back in using fibers! Combined with a timer that causes the switch, we can synchronously update the window as well as render frames. Here is a sample from my code:
LRESULT Window::Impl::WndProc(HWND a_HWND, UINT a_Message, WPARAM a_WParam, LPARAM a_LParam)
{
switch (a_Message)
{
case WM_ENTERSIZEMOVE:
SetTimer(a_HWND, 0, 1, NULL);
break;
case WM_EXITSIZEMOVE:
KillTimer(a_HWND, 0);
break;
case WM_TIMER:
m_MainFiber.Switch();
break;
case WM_MOVE:
if (m_MoveCallback)
{
m_MoveCallback(m_This, Vector2i(static_cast<int16_t>(LOWORD(a_LParam)), static_cast<int16_t>(HIWORD(a_LParam))));
}
break;
case WM_SIZE:
switch (a_WParam)
{
case SIZE_MINIMIZED:
if (m_MinimizeCallback)
{
m_MinimizeCallback(m_This);
}
break;
case SIZE_MAXIMIZED:
if (m_MaximizeCallback)
{
m_MaximizeCallback(m_This);
}
break;
}
if (m_ResizeCallback)
{
m_ResizeCallback(m_This, Vector2i(static_cast<int16_t>(LOWORD(a_LParam)), static_cast<int16_t>(HIWORD(a_LParam))));
}
break;
case WM_CLOSE:
if (m_CloseCallback)
{
m_CloseCallback(m_This);
}
break;
}
if (a_Message == WM_CLOSE)
{
return 0;
}
return DefWindowProcW(a_HWND, a_Message, a_WParam, a_LParam);
}
As you can see, it is actually pretty simple. Start a timer when resize starts, stop it when resize ends, and switch back to the original fiber when it triggers.
Here is the fiber callback itself:
void Window::Impl::FiberCallback()
{
MSG msg;
for (;;)
{
if (PeekMessageW(&msg, m_Window, 0, 0, PM_REMOVE) != 0)
{
TranslateMessage(&msg);
DispatchMessageW(&msg);
}
else
{
m_MainFiber.Switch();
}
}
}
And then the actual polling is as simple as this:
void Window::PollEvents()
{
m_Impl->m_MessageFiber.Switch();
}
This should make PollEvents always return immediately when not resizing, and after the timer expires when you are resizing. It also completely avoids threading since it all runs on the same thread, it just switches between stacks.
Leave a comment if something is unclear, and I hope it solves your problem.
I want to share cdrom device over network.
On the client side I create root enumerated device (scsi bus). On the server side (where cdrom device resides) I replace device stack's FDO with my own (in other words - cdrom.sys is replaced by another driver).
Requests are redirected from client to server using windows sockets.
The format of data transferred over network (from client to server): USER_HEADER, USER_SCSI_REQUEST_BLOCK, [data to be transferred to device]
The format of data transferred over network (from server to client):
USER_HEADER, USER_SCSI_REQUEST_BLOCK, [data to be transferred from device / sense data]
The structures are defined as follows:
struct USER_HEADER
{
ULONG Id;
ULONG Length;
ULONG MajorFunction;
ULONG MinorFunction;
ULONG IoControlCode;
ULONG InputBufferLength;
ULONG OutputBufferLength;
NTSTATUS Status;
ULONG Information;
};
struct USER_SCSI_REQUEST_BLOCK
{
UCHAR Function;
UCHAR SrbStatus;
UCHAR ScsiStatus;
UCHAR PathId;
UCHAR TargetId;
UCHAR Lun;
UCHAR QueueTag;
UCHAR QueueAction;
UCHAR CdbLength;
UCHAR SenseInfoBufferLength;
ULONG SrbFlags;
ULONG DataTransferLength;
ULONG TimeOutValue;
ULONG QueueSortKey;
UCHAR Cdb[16];
};
Client side code to pack and unpack requests sent from cdrom.sys:
PVOID GetBuffer(MDL *pSourceMdl, MDL *pTargetMdl, PVOID pBuffer, ULONG Length, BOOLEAN *pUnmap)
{
PVOID pBuffer2;
if (pSourceMdl->MdlFlags & (MDL_MAPPED_TO_SYSTEM_VA | MDL_SOURCE_IS_NONPAGED_POOL))
{
pBuffer2 = (UCHAR*)pSourceMdl->MappedSystemVa + ((UCHAR*)pBuffer - ((UCHAR*)pSourceMdl->StartVa + pSourceMdl->ByteOffset));
*pUnmap = FALSE;
}
else
{
IoBuildPartialMdl(pSourceMdl, pTargetMdl, pBuffer, Length);
pBuffer2 = MmMapLockedPagesSpecifyCache(pTargetMdl, KernelMode, MmCached, NULL, FALSE, NormalPagePriority);
*pUnmap = TRUE;
}
return pBuffer2;
}
void PackRequest(IRP *pIrp, USER_HEADER *pUserHeader, STORAGE_EXTENSION *pStorageExtension)
{
BOOLEAN Unmap;
PVOID pBuffer;
IO_STACK_LOCATION *pStack;
USER_SCSI_REQUEST_BLOCK *pUserSrb;
SCSI_REQUEST_BLOCK *pSrb;
pStack = IoGetCurrentIrpStackLocation(pIrp);
pIrp->Tail.Overlay.DriverContext[0] = (PVOID)pStorageExtension->Id;
pUserHeader->Id = pStorageExtension->Id;
++pStorageExtension->Id;
pUserHeader->Status = 0;
pUserHeader->Information = 0;
pUserHeader->MajorFunction = pStack->MajorFunction;
pUserHeader->MinorFunction = pStack->MinorFunction;
if (pStack->MajorFunction == IRP_MJ_INTERNAL_DEVICE_CONTROL)
{
pUserHeader->IoControlCode = 0;
pUserHeader->InputBufferLength = 0;
pUserHeader->OutputBufferLength = 0;
pUserHeader->Length = sizeof(USER_HEADER) + sizeof(USER_SCSI_REQUEST_BLOCK);
pUserSrb = (USER_SCSI_REQUEST_BLOCK*)((UCHAR*)pUserHeader + sizeof(USER_HEADER));
pSrb = pStack->Parameters.Scsi.Srb;
pUserSrb->Function = pSrb->Function;
pUserSrb->SrbStatus = pSrb->SrbStatus;
pUserSrb->ScsiStatus = pSrb->ScsiStatus;
pUserSrb->PathId = pSrb->PathId;
pUserSrb->TargetId = pSrb->TargetId;
pUserSrb->Lun = pSrb->Lun;
pUserSrb->QueueTag = pSrb->QueueTag;
pUserSrb->QueueAction = pSrb->QueueAction;
pUserSrb->CdbLength = pSrb->CdbLength;
pUserSrb->SenseInfoBufferLength = pSrb->SenseInfoBufferLength;
pUserSrb->SrbFlags = pSrb->SrbFlags;
pUserSrb->DataTransferLength = pSrb->DataTransferLength;
pUserSrb->TimeOutValue = pSrb->TimeOutValue;
if ((pSrb->DataTransferLength) && (pSrb->SrbFlags & SRB_FLAGS_DATA_OUT))
{
pBuffer = GetBuffer(pIrp->MdlAddress, pStorageExtension->pMdl, pSrb->DataBuffer, pSrb->DataTransferLength, &Unmap);
memcpy((UCHAR*)pUserSrb + sizeof(USER_SCSI_REQUEST_BLOCK), pBuffer, pSrb->DataTransferLength);
if (Unmap) MmUnmapLockedPages(pBuffer, pStorageExtension->pMdl);
pUserHeader->Length += pSrb->DataTransferLength;
}
pUserSrb->QueueSortKey = pSrb->QueueSortKey;
memcpy(pUserSrb->Cdb, pSrb->Cdb, sizeof(pSrb->Cdb));
}
else
{
pUserHeader->IoControlCode = pStack->Parameters.DeviceIoControl.IoControlCode;
pUserHeader->InputBufferLength = pStack->Parameters.DeviceIoControl.InputBufferLength;
pUserHeader->OutputBufferLength = pStack->Parameters.DeviceIoControl.OutputBufferLength;
pUserHeader->Length = sizeof(USER_HEADER);
if ((pUserHeader->IoControlCode == IOCTL_STORAGE_QUERY_PROPERTY) ||
(pUserHeader->IoControlCode == IOCTL_STORAGE_ENABLE_IDLE_POWER))
{
pUserHeader->Length += pUserHeader->InputBufferLength;
memcpy((UCHAR*)pUserHeader + sizeof(USER_HEADER), pIrp->AssociatedIrp.SystemBuffer, pUserHeader->InputBufferLength);
}
else if ((pUserHeader->IoControlCode != IOCTL_STORAGE_POWER_ACTIVE) &&
(pUserHeader->IoControlCode != IOCTL_SCSI_GET_ADDRESS))
{
__debugbreak();
}
}
}
void UnpackRequest(USER_HEADER *pUserHeader, IRP *pIrp, STORAGE_EXTENSION *pStorageExtension)
{
BOOLEAN Unmap;
PVOID pBuffer;
IO_STACK_LOCATION *pStack;
USER_SCSI_REQUEST_BLOCK *pUserSrb;
SCSI_REQUEST_BLOCK *pSrb;
pStack = IoGetCurrentIrpStackLocation(pIrp);
if (pUserHeader->MajorFunction == IRP_MJ_INTERNAL_DEVICE_CONTROL)
{
pUserSrb = (USER_SCSI_REQUEST_BLOCK*)((UCHAR*)pUserHeader + sizeof(USER_HEADER));
pSrb = pStack->Parameters.Scsi.Srb;
pSrb->SrbStatus = pUserSrb->SrbStatus;
pSrb->ScsiStatus = pUserSrb->ScsiStatus;
pSrb->SenseInfoBufferLength = pUserSrb->SenseInfoBufferLength;
pSrb->DataTransferLength = pUserSrb->DataTransferLength;
if (NT_SUCCESS(pUserHeader->Status))
{
if ((pUserSrb->DataTransferLength) && (pUserSrb->SrbFlags & SRB_FLAGS_DATA_IN))
{
pBuffer = GetBuffer(pIrp->MdlAddress, pStorageExtension->pMdl, pSrb->DataBuffer, pUserSrb->DataTransferLength, &Unmap);
memcpy(pBuffer, (UCHAR*)pUserSrb + sizeof(USER_SCSI_REQUEST_BLOCK), pUserSrb->DataTransferLength);
if (Unmap) MmUnmapLockedPages(pBuffer, pStorageExtension->pMdl);
}
else
{
if (pUserSrb->Function == SRB_FUNCTION_CLAIM_DEVICE) pSrb->DataBuffer = pStack->DeviceObject;
}
}
else
{
if ((pUserSrb->SenseInfoBufferLength) && (pUserSrb->SrbStatus & SRB_STATUS_AUTOSENSE_VALID))
{
memcpy(pSrb->SenseInfoBuffer, (UCHAR*)pUserSrb + sizeof(USER_SCSI_REQUEST_BLOCK), pUserSrb->SenseInfoBufferLength);
}
}
}
else
{
if (NT_SUCCESS(pUserHeader->Status))
{
if ((pUserHeader->IoControlCode == IOCTL_SCSI_GET_ADDRESS) ||
(pUserHeader->IoControlCode == IOCTL_STORAGE_QUERY_PROPERTY))
{
memcpy(pIrp->AssociatedIrp.SystemBuffer, (UCHAR*)pUserHeader + sizeof(USER_HEADER), pUserHeader->Information);
}
}
}
}
Server side code to allocate request and IO completion routine:
NTSTATUS AllocateRequest(DEVICE_EXTENSION *pDeviceExtension, IRP *pIrp, IRP **ppIrp2)
{
IRP *pIrp2;
PVOID pBuffer;
NTSTATUS Status;
IO_STACK_LOCATION *pStack;
SCSI_REQUEST_BLOCK *pSrb;
USER_SCSI_REQUEST_BLOCK *pUserSrb;
DEVICE_OBJECT *pDeviceObject;
USER_HEADER *pUserHeader;
pUserHeader = (USER_HEADER*)MmGetSystemAddressForMdlSafe(pIrp->MdlAddress, NormalPagePriority);
pDeviceObject = pDeviceExtension->pLowerDeviceObject;
pIrp2 = IoAllocateIrp(pDeviceObject->StackSize, FALSE);
if (pIrp2)
{
pStack = IoGetNextIrpStackLocation(pIrp2);
pStack->DeviceObject = pDeviceObject;
pIrp2->Tail.Overlay.Thread = PsGetCurrentThread();
pStack->MajorFunction = pUserHeader->MajorFunction;
pStack->MinorFunction = pUserHeader->MinorFunction;
if (pUserHeader->MajorFunction == IRP_MJ_INTERNAL_DEVICE_CONTROL)
{
pUserSrb = (USER_SCSI_REQUEST_BLOCK*)((UCHAR*)pUserHeader + sizeof(USER_HEADER));
pSrb = (SCSI_REQUEST_BLOCK*)((UCHAR*)pUserSrb + (sizeof(USER_SCSI_REQUEST_BLOCK) + pUserSrb->DataTransferLength + pUserSrb->SenseInfoBufferLength));
pSrb->Length = sizeof(SCSI_REQUEST_BLOCK);
pSrb->Function = pUserSrb->Function;
pSrb->SrbStatus = pUserSrb->SrbStatus;
pSrb->ScsiStatus = pUserSrb->ScsiStatus;
pSrb->PathId = pUserSrb->PathId;
pSrb->TargetId = pUserSrb->TargetId;
pSrb->Lun = pUserSrb->Lun;
pSrb->QueueTag = pUserSrb->QueueTag;
pSrb->QueueAction = pUserSrb->QueueAction;
pSrb->CdbLength = pUserSrb->CdbLength;
pSrb->SenseInfoBufferLength = pUserSrb->SenseInfoBufferLength;
pSrb->SrbFlags = pUserSrb->SrbFlags;
pSrb->DataTransferLength = pUserSrb->DataTransferLength;
pSrb->TimeOutValue = pUserSrb->TimeOutValue;
if (pUserSrb->DataTransferLength)
{
pSrb->DataBuffer = (UCHAR*)pIrp->MdlAddress->StartVa + pIrp->MdlAddress->ByteOffset + (sizeof(USER_HEADER) + sizeof(USER_SCSI_REQUEST_BLOCK));
IoBuildPartialMdl(pIrp->MdlAddress, pDeviceExtension->pMdl, pSrb->DataBuffer, pUserSrb->DataTransferLength);
pIrp2->MdlAddress = pDeviceExtension->pMdl;
}
else pSrb->DataBuffer = NULL;
if (pUserSrb->SenseInfoBufferLength)
{
pSrb->SenseInfoBuffer = (UCHAR*)pUserSrb + (sizeof(USER_SCSI_REQUEST_BLOCK) + pUserSrb->DataTransferLength);
}
else pSrb->SenseInfoBuffer = NULL;
pSrb->NextSrb = NULL;
pSrb->OriginalRequest = pIrp2;
pSrb->SrbExtension = NULL;
pSrb->QueueSortKey = pUserSrb->QueueSortKey;
memcpy(pSrb->Cdb, pUserSrb->Cdb, sizeof(pSrb->Cdb));
pStack->Parameters.Scsi.Srb = pSrb;
}
else
{
pStack->Parameters.DeviceIoControl.IoControlCode = pUserHeader->IoControlCode;
pBuffer = (UCHAR*)pUserHeader + sizeof(USER_HEADER);
if (pUserHeader->IoControlCode == IOCTL_SCSI_GET_ADDRESS)
{
pStack->Parameters.DeviceIoControl.OutputBufferLength = pUserHeader->OutputBufferLength;
pIrp2->AssociatedIrp.SystemBuffer = pBuffer;
}
else if (pUserHeader->IoControlCode == IOCTL_STORAGE_QUERY_PROPERTY)
{
pStack->Parameters.DeviceIoControl.InputBufferLength = pUserHeader->InputBufferLength;
pStack->Parameters.DeviceIoControl.OutputBufferLength = pUserHeader->OutputBufferLength;
pIrp2->AssociatedIrp.SystemBuffer = pBuffer;
}
else if (pUserHeader->IoControlCode == IOCTL_STORAGE_ENABLE_IDLE_POWER)
{
pStack->Parameters.DeviceIoControl.InputBufferLength = pUserHeader->InputBufferLength;
pIrp2->AssociatedIrp.SystemBuffer = pBuffer;
}
}
*ppIrp2 = pIrp2;
Status = STATUS_SUCCESS;
}
else Status = STATUS_INSUFFICIENT_RESOURCES;
return Status;
}
NTSTATUS IoCompletionRoutine3(DEVICE_OBJECT *pDeviceObject, IRP *pIrp2, void *pContext)
{
IRP *pIrp;
USER_HEADER *pUserHeader;
DEVICE_EXTENSION *pDeviceExtension;
USER_SCSI_REQUEST_BLOCK *pUserSrb;
SCSI_REQUEST_BLOCK *pSrb;
pDeviceExtension = (DEVICE_EXTENSION*)pIrp2->Tail.Overlay.DriverContext[0];
pIrp = (IRP*)pIrp2->Tail.Overlay.DriverContext[1];
pUserHeader = (USER_HEADER*)MmGetSystemAddressForMdlSafe(pIrp->MdlAddress, NormalPagePriority);
pUserHeader->Status = pIrp2->IoStatus.Status;
pUserHeader->Information = pIrp2->IoStatus.Information;
if (pUserHeader->MajorFunction == IRP_MJ_INTERNAL_DEVICE_CONTROL)
{
pUserSrb = (USER_SCSI_REQUEST_BLOCK*)((UCHAR*)pUserHeader + sizeof(USER_HEADER));
pSrb = (SCSI_REQUEST_BLOCK*)((UCHAR*)pUserSrb + (sizeof(USER_SCSI_REQUEST_BLOCK) + pUserSrb->DataTransferLength + pUserSrb->SenseInfoBufferLength));
pUserSrb->SrbStatus = pSrb->SrbStatus;
pUserSrb->ScsiStatus = pSrb->ScsiStatus;
pUserSrb->SenseInfoBufferLength = pSrb->SenseInfoBufferLength;
pUserSrb->DataTransferLength = pSrb->DataTransferLength;
pUserHeader->Length = sizeof(USER_HEADER) + sizeof(USER_SCSI_REQUEST_BLOCK);
if (NT_SUCCESS(pUserHeader->Status))
{
if ((pSrb->DataTransferLength) && (pSrb->SrbFlags & SRB_FLAGS_DATA_IN))
{
pUserHeader->Length += pUserSrb->DataTransferLength;
}
}
else
{
if ((pSrb->SenseInfoBufferLength) && (pSrb->SrbStatus & SRB_STATUS_AUTOSENSE_VALID))
{
pUserHeader->Length += pUserSrb->SenseInfoBufferLength;
if (pSrb->DataTransferLength)
{
memmove((UCHAR*)pUserSrb + sizeof(USER_SCSI_REQUEST_BLOCK), (UCHAR*)pUserSrb + (sizeof(USER_SCSI_REQUEST_BLOCK) + pUserSrb->DataTransferLength), pUserSrb->SenseInfoBufferLength);
}
}
}
}
else
{
pUserHeader->Length = sizeof(USER_HEADER);
if (pUserHeader->IoControlCode == IOCTL_SCSI_GET_ADDRESS)
{
pUserHeader->Length += pUserHeader->Information;
}
else if (pUserHeader->IoControlCode == IOCTL_STORAGE_QUERY_PROPERTY)
{
pUserHeader->Length += pUserHeader->Information;
}
}
IoFreeIrp(pIrp2);
CompleteRequest(pIrp, STATUS_SUCCESS, 0);
IoReleaseRemoveLock(&pDeviceExtension->RemoveLock, pIrp);
return STATUS_MORE_PROCESSING_REQUIRED;
}
Everything runs fine (requests are passed between server and client, no BSOD, etc), but cdrom device just does not show up on the client side. I thought it might be something with srb data buffer access. Can you help me to figure it out? Thank you.
Is there a way to obtain the GLXContext associated with a window, without calling glXGetCurrentContext()?
Cast the SDL_GLContext you get from SDL_GL_CreateContext() to GLXContext:
SDL_GLContext
X11_GL_CreateContext(_THIS, SDL_Window * window)
{
SDL_WindowData *data = (SDL_WindowData *) window->driverdata;
Display *display = data->videodata->display;
int screen =
((SDL_DisplayData *) SDL_GetDisplayForWindow(window)->driverdata)->screen;
XWindowAttributes xattr;
XVisualInfo v, *vinfo;
int n;
GLXContext context = NULL, share_context;
if (_this->gl_config.share_with_current_context) {
share_context = (GLXContext)SDL_GL_GetCurrentContext();
} else {
share_context = NULL;
}
/* We do this to create a clean separation between X and GLX errors. */
X11_XSync(display, False);
errorHandlerOperation = "create GL context";
errorBase = _this->gl_data->errorBase;
errorCode = Success;
handler = X11_XSetErrorHandler(X11_GL_ErrorHandler);
X11_XGetWindowAttributes(display, data->xwindow, &xattr);
v.screen = screen;
v.visualid = X11_XVisualIDFromVisual(xattr.visual);
vinfo = X11_XGetVisualInfo(display, VisualScreenMask | VisualIDMask, &v, &n);
if (vinfo) {
if (_this->gl_config.major_version < 3 &&
_this->gl_config.profile_mask == 0 &&
_this->gl_config.flags == 0) {
/* Create legacy context */
context =
_this->gl_data->glXCreateContext(display, vinfo, share_context, True);
} else {
/* max 10 attributes plus terminator */
int attribs[11] = {
GLX_CONTEXT_MAJOR_VERSION_ARB,
_this->gl_config.major_version,
GLX_CONTEXT_MINOR_VERSION_ARB,
_this->gl_config.minor_version,
0
};
int iattr = 4;
/* SDL profile bits match GLX profile bits */
if( _this->gl_config.profile_mask != 0 ) {
attribs[iattr++] = GLX_CONTEXT_PROFILE_MASK_ARB;
attribs[iattr++] = _this->gl_config.profile_mask;
}
/* SDL flags match GLX flags */
if( _this->gl_config.flags != 0 ) {
attribs[iattr++] = GLX_CONTEXT_FLAGS_ARB;
attribs[iattr++] = _this->gl_config.flags;
}
/* only set if glx extension is available */
if( _this->gl_data->HAS_GLX_ARB_context_flush_control ) {
attribs[iattr++] = GLX_CONTEXT_RELEASE_BEHAVIOR_ARB;
attribs[iattr++] =
_this->gl_config.release_behavior ?
GLX_CONTEXT_RELEASE_BEHAVIOR_FLUSH_ARB :
GLX_CONTEXT_RELEASE_BEHAVIOR_NONE_ARB;
}
attribs[iattr++] = 0;
/* Get a pointer to the context creation function for GL 3.0 */
if (!_this->gl_data->glXCreateContextAttribsARB) {
SDL_SetError("OpenGL 3.0 and later are not supported by this system");
} else {
int glxAttribs[64];
/* Create a GL 3.x context */
GLXFBConfig *framebuffer_config = NULL;
int fbcount = 0;
X11_GL_GetAttributes(_this,display,screen,glxAttribs,64,SDL_TRUE);
if (!_this->gl_data->glXChooseFBConfig
|| !(framebuffer_config =
_this->gl_data->glXChooseFBConfig(display,
DefaultScreen(display), glxAttribs,
&fbcount))) {
SDL_SetError("No good framebuffers found. OpenGL 3.0 and later unavailable");
} else {
context = _this->gl_data->glXCreateContextAttribsARB(display,
framebuffer_config[0],
share_context, True, attribs);
}
}
}
X11_XFree(vinfo);
}
X11_XSync(display, False);
X11_XSetErrorHandler(handler);
if (!context) {
if (errorCode == Success) {
SDL_SetError("Could not create GL context");
}
return NULL;
}
if (X11_GL_MakeCurrent(_this, window, context) < 0) {
X11_GL_DeleteContext(_this, context);
return NULL;
}
return context;
}
SDL2 does not otherwise store the context anywhere.
So I have this problem I have been stuck on for a few weeks now where the instance buffer is not working in my DX 11_0 application, the vertex buffer and index buffers are working just fin but for some reason nothing is getting passed in to the instance buffer even though the instance buffer was created with S_OK and throws no error.
Here is the definition and creation of the instance buffer
instanceDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
instanceDesc.ByteWidth = sizeof(InstanceVertex2) * MAX_INSTANCES;
instanceDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
instanceDesc.MiscFlags = 0;
instanceDesc.StructureByteStride = 0;
instanceDesc.Usage = D3D11_USAGE_DYNAMIC;
instanceData.pSysMem = new InstanceVertex2[MAX_INSTANCES];
instanceData.SysMemPitch = 0;
instanceData.SysMemSlicePitch = 0;
//create the instance buffer
result = device->CreateBuffer(&instanceDesc, &instanceData, &m_instanceBuffer);
if (FAILED(result))
{
return false;
}
Here is the polygon layout
//vertex position, by vertex
polygonLayout[0].AlignedByteOffset = 0;
polygonLayout[0].Format = DXGI_FORMAT_R32G32B32_FLOAT;
polygonLayout[0].InputSlot = 0;
polygonLayout[0].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
polygonLayout[0].InstanceDataStepRate = 0;
polygonLayout[0].SemanticIndex = 0;
polygonLayout[0].SemanticName = "POSITION";
//uv coords, by vertex
polygonLayout[1].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygonLayout[1].Format = DXGI_FORMAT_R32G32_FLOAT;
polygonLayout[1].InputSlot = 0;
polygonLayout[1].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
polygonLayout[1].InstanceDataStepRate = 0;
polygonLayout[1].SemanticIndex = 0;
polygonLayout[1].SemanticName = "TEXCOORD";
//texture ID, by instance
polygonLayout[2].AlignedByteOffset = 0;
polygonLayout[2].Format = DXGI_FORMAT_R32_SINT;
polygonLayout[2].InputSlot = 1;
polygonLayout[2].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA;
polygonLayout[2].InstanceDataStepRate = 1;
polygonLayout[2].SemanticIndex = 0;
polygonLayout[2].SemanticName = "TEXTUREID";
//color, by instance
polygonLayout[3].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygonLayout[3].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
polygonLayout[3].InputSlot = 1;
polygonLayout[3].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA;
polygonLayout[3].InstanceDataStepRate = 1;
polygonLayout[3].SemanticIndex = 0;
polygonLayout[3].SemanticName = "COLOR";
//UVAdd , by instance
polygonLayout[4].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygonLayout[4].Format = DXGI_FORMAT_R32G32_FLOAT;
polygonLayout[4].InputSlot = 1;
polygonLayout[4].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA;
polygonLayout[4].InstanceDataStepRate = 1;
polygonLayout[4].SemanticIndex = 0;
polygonLayout[4].SemanticName = "UVADD";
//UVMultiply, by instance
polygonLayout[5].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygonLayout[5].Format = DXGI_FORMAT_R32G32_FLOAT;
polygonLayout[5].InputSlot = 1;
polygonLayout[5].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA;
polygonLayout[5].InstanceDataStepRate = 1;
polygonLayout[5].SemanticIndex = 0;
polygonLayout[5].SemanticName = "UVMULTIPLY";
//matrix row 1
polygonLayout[6].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygonLayout[6].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
polygonLayout[6].InputSlot = 1;
polygonLayout[6].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA;
polygonLayout[6].InstanceDataStepRate = 1;
polygonLayout[6].SemanticIndex = 0;
polygonLayout[6].SemanticName = "MATRIX";
//matrix row 2
polygonLayout[7].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygonLayout[7].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
polygonLayout[7].InputSlot = 1;
polygonLayout[7].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA;
polygonLayout[7].InstanceDataStepRate = 1;
polygonLayout[7].SemanticIndex = 1;
polygonLayout[7].SemanticName = "MATRIX";
//matrix row 3
polygonLayout[8].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygonLayout[8].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
polygonLayout[8].InputSlot = 1;
polygonLayout[8].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA;
polygonLayout[8].InstanceDataStepRate = 1;
polygonLayout[8].SemanticIndex = 2;
polygonLayout[8].SemanticName = "MATRIX";
//matrix row 4
polygonLayout[9].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygonLayout[9].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
polygonLayout[9].InputSlot = 1;
polygonLayout[9].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA;
polygonLayout[9].InstanceDataStepRate = 1;
polygonLayout[9].SemanticIndex = 3;
polygonLayout[9].SemanticName = "MATRIX";
numElements = sizeof(polygonLayout) / sizeof(polygonLayout[0]);
//create the input layout
result = device->CreateInputLayout(polygonLayout, numElements, vertexShaderBuffer->GetBufferPointer(), vertexShaderBuffer->GetBufferSize(), &m_layout);
if (FAILED(result))
{
MessageBox(hwnd, TEXT("Failed to create the input layout"), TEXT("Error initializaing shader"), MB_OK);
return false;
}
Here is me actually updating the instance buffer(it is dynamic)
result = deviceContext->Map(m_instanceBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &data);
if (FAILED(result))
{
return false;
}
instancesPtr = (InstanceVertex2*)data.pData;
memcpy(instancesPtr, (void*)instances, sizeof(&m_vertices[0]));
//now un map
deviceContext->Unmap(m_instanceBuffer, 0);
And finally this is the code where I put the buffers into the device context
//set the buffers
buffers[0] = m_vertexBuffer;
buffers[1] = m_instanceBuffer;
//set the strides
strides[0] = sizeof(InstanceVertex1);
strides[1] = sizeof(InstanceVertex2);
//set the offsets
offsets[0] = 0;
offsets[1] = 0;
//set the vertex buffers
deviceContext->IASetVertexBuffers(0, 2, buffers, strides, offsets);
//set the index buffers
deviceContext->IASetIndexBuffer(m_indexBuffer, DXGI_FORMAT_R32_UINT, 0);
deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
This does seem like a lot of code to look through but I don't know exactly what is going wrong, I have got instancing and dynamic vertex buffers to work in the past without problems and for some reason I can not get it to work when I combine them.I have nvidea nsight so I can look in the vertex shader and buffers directly, and from what I am looking at I can tell that nothing is getting passed into the instance buffer. I would appreciate any help or pointers anyone is willing to give me so that I can fix my problem.
I figured it out, it turned out the problem lay not in the initialization of the instance buffer but in the way I was updating it, I will include the answer to it so that others might be able to get help with a similar problem.
I changed :
result = deviceContext->Map(m_instanceBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &data);
if (FAILED(result))
{
return false;
}
instancesPtr = (InstanceVertex2*)data.pData;
memcpy(instancesPtr, (void*)instances, sizeof(&m_vertices[0]));
//now un map
deviceContext->Unmap(m_instanceBuffer, 0);
To :
result = deviceContext->Map(m_instanceBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &data);
if (FAILED(result))
{
return false;
}
instancesPtr = (InstanceVertex2*)data.pData;
for (int i = 0;i < m_vertices.size();i++)
{
instancesPtr[i].color = m_vertices[i].color;
instancesPtr[i].matrixInstance = m_vertices[i].matrixInstance;
instancesPtr[i].textureID = m_vertices[i].textureID;
instancesPtr[i].UVAdd = m_vertices[i].UVAdd;
instancesPtr[i].UVMultiply = m_vertices[i].UVMultiply;
}
//memcpy(instancesPtr, (void*)instances, sizeof(&m_vertices[0]));
//now un map
deviceContext->Unmap(m_instanceBuffer, 0);
I hope this helps someone else with there problem someday, it certainly took me long enough.