Related
Docs suggest, that default usage textures can be mapped on UMA architectures like Intel integrated graphics with Direct3D 11.3.
I tried to achieve this, but Map() always fails with E_INVALIDARG.
I am quite new to C++ and DirectX, but below is what I believe to be a minimal test case. Please don't hesitate to point out any stupidity I am committing.
I am running this on a notebook with Windows 10 1809, Intel Skylake i5-6300U with HD Graphics 520.
#include "pch.h"
#include <iostream>
#include <dxgi1_6.h>
#include <d3d.h>
#include <d3d11_4.h>
#include <assert.h>
int main()
{
HRESULT res = S_OK;
ID3D11Device *Dev = nullptr;
ID3D11DeviceContext *Ctx = nullptr;
D3D_FEATURE_LEVEL Fl;
D3D_FEATURE_LEVEL fls[1] = { D3D_FEATURE_LEVEL_11_1 };
res = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, D3D11_CREATE_DEVICE_DEBUG | D3D11_CREATE_DEVICE_BGRA_SUPPORT, fls, 1, D3D11_SDK_VERSION, &Dev, &Fl, &Ctx);
assert(res == S_OK);
assert(Fl == D3D_FEATURE_LEVEL_11_1);
ID3D11Device5 *Dev5 = nullptr;
res = Dev->QueryInterface<ID3D11Device5>(&Dev5);
assert(res == S_OK);
Dev->Release();
Dev = nullptr;
ID3D11DeviceContext4 *Ctx4;
res = Ctx->QueryInterface<ID3D11DeviceContext4>(&Ctx4);
assert(res == S_OK);
Ctx->Release();
Ctx = nullptr;
D3D11_FEATURE_DATA_D3D11_OPTIONS2 opts2;
res = Dev5->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, &opts2, sizeof(opts2));
assert(res == S_OK);
assert(opts2.MapOnDefaultTextures);
assert(opts2.UnifiedMemoryArchitecture);
D3D11_TEXTURE2D_DESC1 texDesc = { 0 };
texDesc.ArraySize = 1;
texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS;
texDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
texDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
texDesc.Height = 256;
texDesc.Width = 256;
texDesc.MipLevels = 1;
texDesc.MiscFlags = 0;
texDesc.SampleDesc.Count = 1;
texDesc.SampleDesc.Quality = 0;
texDesc.TextureLayout = D3D11_TEXTURE_LAYOUT_UNDEFINED;
texDesc.Usage = D3D11_USAGE_DEFAULT;
byte mem[256 * 256 * 4];
ZeroMemory(mem, 256 * 256 * 4);
D3D11_SUBRESOURCE_DATA data = { 0 };
data.pSysMem = mem;
data.SysMemPitch = 256 * 4;
ID3D11Texture2D1 *tex2d;
res = Dev5->CreateTexture2D1(&texDesc, &data, &tex2d);
assert(res == S_OK);
D3D11_MAPPED_SUBRESOURCE map = { 0 };
// I believe at least one of these should succeed, but all fail
res = Ctx4->Map(tex2d, 0, D3D11_MAP_READ, 0, &map);
//res = Ctx4->Map(tex2d, 0, D3D11_MAP_WRITE, 0, &map);
//res = Ctx4->Map(tex2d, 0, D3D11_MAP_READ_WRITE, 0, &map);
assert(res == S_OK); // E_INVALIDARG
}
I believe the Map() call should succeed, but it fails with E_INVALIDARG.
EDIT: I tried D3D11_TEXTURE_LAYOUT_ROW_MAJOR and D3D11_TEXTURE_LAYOUT_64K_STANDARD_SWIZZLE too, but then CreateTexture2D1() fails with E_INVALIDARG. Maybe my hardware doesn't support those modes?
I think the issue is described in the documentation:
It is illegal to set CPU access flags on default textures without also setting TextureLayout to a value other than D3D11_TEXTURE_LAYOUT_UNDEFINED.
My project uses DirectX 10 and some of its boilerplate to render a scene, however, it crashes with an error message "Could not initialize the model object." As far as I understand, making it up to this point means that, at the very least, the model has been successfully created, so the error must be in one of the files below, which is fortunate as the most difficult tasks are handled by the FallBodyClass.cpp that hosts OpenCL API interactions. If needed, I can try attaching parts of it in a later edit.
During debug, my IDE shows that all components of m_Model (m_vertexBuffer, m_indexBuffer etc) are shown as with _vfptr . I do not know what to make of it, but it does seem to confirm that modelclass.cpp is the point of failure.
graphicsclass.cpp
GraphicsClass::GraphicsClass()
{
m_Direct3D = 0;
m_Model = 0;
m_ColorShader = 0;
m_bodies = BODIES;
}
GraphicsClass::GraphicsClass(const GraphicsClass& other)
{}
GraphicsClass::~GraphicsClass()
{}
bool GraphicsClass::Initialize(int screenWidth, int screenHeight, HWND hwnd)
{
bool result;
// Create the Direct3D object.
m_Direct3D = new D3DClass;
if (!m_Direct3D)
{
return false;
}
// Initialize the Direct3D object.
result = m_Direct3D->Initialize(screenWidth, screenHeight, VSYNC_ENABLED, hwnd, FULL_SCREEN, SCREEN_DEPTH, SCREEN_NEAR);
if (!result)
{
MessageBox(hwnd, L"Could not initialize Direct3D", L"Error", MB_OK);
return false;
}
// Create the model object.
m_Model = new ModelClass(m_bodies);
if (!m_Model)
{
return false;
}
// Initialize the model object.
result = m_Model->Initialize(m_Direct3D->GetDevice());
if (!result)
{
MessageBox(hwnd, L"Could not initialize the model object.", L"Error", MB_OK);
return false;
}
modelclass.cpp
ModelClass::ModelClass(int bodies)
{
m_vertexBuffer = 0;
m_indexBuffer = 0;
m_positions = 0;
m_velocities = 0;
m_bodySystem = 0;
m_bodies = bodies;
}
ModelClass::ModelClass(const ModelClass& other)
{}
ModelClass::~ModelClass()
{}
bool ModelClass::Initialize(ID3D10Device* device)
{
bool result;
TwoLines twoLinesConstants = CalculateLinesConstants(M_PI_4);
m_positions = new float[COORD_DIM * m_bodies];
m_velocities = new float[VEL_DIM * m_bodies];
m_bodySystem = new class FallBodyClass(m_bodies, &m_positions, &m_velocities, twoLinesConstants, result);
if (!result) {
return false;
}
// Initialize the vertex and index buffer that hold the geometry for the triangle.
result = InitializeBuffers(device, twoLinesConstants);
if(!result)
{
return false;
}
return true;
}
FallBodyclass.cpp
FallBodyClass::FallBodyClass(int bodies, float ** positionsCPU, float ** velocitiesCPU, TwoLines twoLines, bool & success)
:bodies(bodies)
{
cl_int ret;
// getting the first available platform
cl_platform_id clPlatformID[2];
cl_platform_id GPUplatform;
cl_uint num_platforms;
//char str[1024];
ret = clGetPlatformIDs(2, clPlatformID, &num_platforms);
GPUplatform = clPlatformID[0]; //choose GPU platform
//error |= clGetPlatformInfo(GPUplatform, CL_PLATFORM_NAME, 0, NULL, NULL);
//clGetPlatformInfo(GPUplatform, CL_PLATFORM_VENDOR, sizeof(str), str, NULL);
// getting the first GPU device
ret |= clGetDeviceIDs(GPUplatform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if (ret != CL_SUCCESS)
{
success = false;
return;
}
//clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(str), str, NULL);
// creating the context
context = clCreateContext(0, 1, &device, NULL, NULL, &ret);
if (ret != CL_SUCCESS)
{
success = false;
return;
}
cl_queue_properties props[] = {
CL_QUEUE_PROFILING_ENABLE
};
// creating the command queue
queue = clCreateCommandQueueWithProperties(context, device, props, &ret);
if (ret != CL_SUCCESS)
{
success = false;
return;
}
// setting the local variables
// (at the same time one of them supposed to be 0 and another to be 1)
read = 0;
write = 1;
// reading the kernel
FILE * f = NULL;
char fileName[18] = "kernel.cl";
f = fopen(fileName, "rb");
if(f == NULL)
{
success = false;
return;
}
// getting the length of the source code for the kernel
fseek(f, 0, SEEK_END);
size_t codeLength = ftell(f);
rewind(f);
char * code = (char *)malloc(codeLength + 1);
if (fread(code, codeLength, 1, f) != 1)
{
fclose(f);
free(code);
success = false;
return;
}
// closing the file and 0-terminating the source code
fclose(f);
code[codeLength] = '\0';
// creating the program
program = clCreateProgramWithSource(context, 1, (const char **)&code, &codeLength, &ret);
if (ret != CL_SUCCESS)
{
success = false;
return;
}
// clearing the memory
free(code);
// building the program
ret |= clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
// creating the kernel
kernel = clCreateKernel(program, "impactManager", &ret);
// setting the local size of the group the largest possible in order to load all computational units
int numGroups;
ret |= clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numGroups), &numGroups, NULL);
localSize = bodies / numGroups;
// allocating pinned buffers for velocities and positions, and stuck
positionsCPUBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, COORD_DIM * bodies * sizeof(float) , NULL, NULL);
velocitiesCPUBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, VEL_DIM * bodies * sizeof(float) , NULL, NULL);
linesCPUBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, 8 * sizeof(float), NULL, NULL);
// get pointers to arrays to operate with the buffers (array map buffers here (to program) as float-arrays)
*positionsCPU = (float *)clEnqueueMapBuffer(queue, positionsCPUBuffer, CL_TRUE, CL_MAP_WRITE, 0, COORD_DIM * bodies * sizeof(float), 0, NULL, NULL, NULL);
*velocitiesCPU = (float *)clEnqueueMapBuffer(queue, velocitiesCPUBuffer, CL_TRUE, CL_MAP_WRITE, 0, VEL_DIM * bodies * sizeof(float), 0, NULL, NULL, NULL);
float * linesCPU = (float *)clEnqueueMapBuffer(queue, linesCPUBuffer, CL_TRUE, CL_MAP_WRITE, 0, 8 * sizeof(float), 0, NULL, NULL, NULL);
// initialization of the bodies' positions and velocities, and stuck
initBodies(*positionsCPU, *velocitiesCPU);
initLines(twoLines, linesCPU);
// unmapping the pointers to arrays (invalidates array pointers)
clEnqueueUnmapMemObject(queue, positionsCPUBuffer, *positionsCPU, 0, NULL, NULL);
clEnqueueUnmapMemObject(queue, velocitiesCPUBuffer, *velocitiesCPU, 0, NULL, NULL);
clEnqueueUnmapMemObject(queue, linesCPUBuffer, linesCPU, 0, NULL, NULL);
// allocate two arrays on GPU for positions and velocities
for (int i = 0; i < 2; ++i) {
positionsGPU[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, COORD_DIM * bodies * sizeof(float), NULL, NULL);
ret |= clEnqueueWriteBuffer(queue, positionsGPU[i], CL_TRUE, 0, COORD_DIM * bodies * sizeof(float), *positionsCPU, 0, NULL, NULL);
velocitiesGPU[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, VEL_DIM * bodies * sizeof(float), NULL, NULL);
ret |= clEnqueueWriteBuffer(queue, velocitiesGPU[i], CL_TRUE, 0, VEL_DIM * bodies * sizeof(float), *velocitiesCPU, 0, NULL, NULL);
}
linesGPU = clCreateBuffer(context, CL_MEM_READ_WRITE, 8 * sizeof(float), NULL, NULL);
ret |= clEnqueueWriteBuffer(queue, linesGPU, CL_TRUE, 0, 8 * sizeof(float), linesCPU, 0, NULL, NULL);
if (ret != CL_SUCCESS)
{
success = false;
return;
}
}
void FallBodyClass::initLines(IN TwoLines l, OUT float *linesCPU)
{
linesCPU[0] = l.a1;
linesCPU[1] = l.b1;
linesCPU[2] = l.R1.x;
linesCPU[3] = l.R1.y;
linesCPU[4] = l.a2;
linesCPU[5] = l.b2;
linesCPU[6] = l.R2.x;
linesCPU[7] = l.R2.y;
}
// initialization of the bodies' positions and velocities
void FallBodyClass::initBodies(float * positionsCPU, float * velocitiesCPU)
{
float scale = 0.20f;
// initialization of the memory
memset(positionsCPU, 0, COORD_DIM * bodies * sizeof(float));
memset(velocitiesCPU, 0, VEL_DIM * bodies * sizeof(float));
// for the randomization
srand((unsigned int)time(NULL));
for (int i = 0; i < bodies; i++)
{
positionsCPU[COORD_DIM * i] = 1.8*((rand() / (float)RAND_MAX) - 0.5); //x axis
positionsCPU[COORD_DIM * i + 1] = 0.9; //y axis
positionsCPU[COORD_DIM * i + 2] = 0.0f; //z axis
positionsCPU[COORD_DIM * i + 3] = 0.0f; // stuck variable
// velocities are zeros
velocitiesCPU[VEL_DIM* i] = 0.0;
velocitiesCPU[VEL_DIM* i + 1] = -2 * (rand() / (float)RAND_MAX);
velocitiesCPU[VEL_DIM* i + 2] = 0.0;
}
}
// updating the bodies' positions and velocities. Stuck is updated inside too
void FallBodyClass::update(float dt, float * positionsCPU, float * velocitiesCPU, bool & success)
{
cl_int error = CL_SUCCESS;
size_t global_work_size;
size_t local_work_size;
success = true;
if (localSize > bodies)
localSize = bodies;
local_work_size = localSize;
global_work_size = bodies;
// passing the arguments
// we write the new positions and velocities and read the previous ones
error |= clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&positionsGPU[write]);
error |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&velocitiesGPU[write]);
error |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&positionsGPU[read]);
error |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&velocitiesGPU[read]);
error |= clSetKernelArg(kernel, 4, sizeof(cl_float), (void *)&dt);
error |= clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&linesGPU);
// just swap read and write in order not to copy the arrays
int temp;
temp = write;
write = read;
read = temp;
// executing the kernel
error |= clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
// synchronization
clFinish(queue);
// asynchronously reading the updated values
error |= clEnqueueReadBuffer(queue, positionsGPU[read], CL_FALSE, 0, COORD_DIM * bodies * sizeof(float), positionsCPU, 0, NULL, NULL);
if (error != CL_SUCCESS)
{
success = false;
}
error |= clEnqueueReadBuffer(queue, velocitiesGPU[read], CL_FALSE, 0, VEL_DIM * bodies * sizeof(float), velocitiesCPU, 0, NULL, NULL);
if (error != CL_SUCCESS)
{
success = false;
}
///////////
bool toReboot = positionsCPU[3]; //fourth index of the [0] first element
//bool toReboot = false;
////////////
if (toReboot) {
positionsCPU = (float *)clEnqueueMapBuffer(queue, positionsCPUBuffer, CL_TRUE, CL_MAP_WRITE, 0, COORD_DIM * bodies * sizeof(float), 0, NULL, NULL, NULL);
velocitiesCPU = (float *)clEnqueueMapBuffer(queue, velocitiesCPUBuffer, CL_TRUE, CL_MAP_WRITE, 0, VEL_DIM * bodies * sizeof(float), 0, NULL, NULL, NULL);
initBodies(positionsCPU, velocitiesCPU);
// unmapping the pointers
clEnqueueUnmapMemObject(queue, positionsCPUBuffer, positionsCPU, 0, NULL, NULL);
clEnqueueUnmapMemObject(queue, velocitiesCPUBuffer, velocitiesCPU, 0, NULL, NULL);
//update values on GPU side
error |= clEnqueueWriteBuffer(queue, positionsGPU[read], CL_TRUE, 0, COORD_DIM * bodies * sizeof(float), positionsCPU, 0, NULL, NULL);
error |= clEnqueueWriteBuffer(queue, velocitiesGPU[read], CL_TRUE, 0, VEL_DIM * bodies * sizeof(float), velocitiesCPU, 0, NULL, NULL);
}
return;
}
FallBodyClass::~FallBodyClass(void)
{
// synchronization (if something has to be done)
clFinish(queue);
// releasing all objects
clReleaseMemObject(linesGPU);
clReleaseMemObject(linesCPUBuffer);
clReleaseMemObject(velocitiesGPU[0]);
clReleaseMemObject(velocitiesGPU[1]);
clReleaseMemObject(positionsGPU[0]);
clReleaseMemObject(positionsGPU[1]);
clReleaseMemObject(positionsCPUBuffer);
clReleaseMemObject(velocitiesCPUBuffer);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
}
I want to capture desktop on which there is an stereo app.
My code
IDirect3D9 *d3d = nullptr;
IDirect3DDevice9* device = nullptr;
IDirect3DSurface9 *surfaceMono = nullptr, *surfaceLeft=nullptr,
*surfaceRight=nullptr, *surfaceFront=nullptr;
D3DDISPLAYMODE mode;
D3DPRESENT_PARAMETERS parameters;
ZeroMemory(&mode, sizeof(mode));
ZeroMemory(¶meters, sizeof(parameters));
HRESULT result=S_OK;`
d3d = Direct3DCreate9(D3D_SDK_VERSION);
result = d3d->GetAdapterDisplayMode(D3DADAPTER_DEFAULT, &mode);
parameters.Windowed = true;
parameters.BackBufferCount = D3DPRESENT_BACK_BUFFERS_MAX;
parameters.BackBufferHeight = mode.Height;
parameters.BackBufferWidth = mode.Width;
parameters.SwapEffect = D3DSWAPEFFECT_FLIP;
parameters.hDeviceWindow = NULL;
parameters.Flags = D3DPRESENTFLAG_LOCKABLE_BACKBUFFER;
result = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, NULL,D3DCREATE_HARDWARE_VERTEXPROCESSING, ¶meters, &device);
result = device->CreateOffscreenPlainSurface(mode.Width, mode.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &surfaceMono, nullptr)
result = device->CreateOffscreenPlainSurface(mode.Width, mode.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &surfaceLeft, nullptr)
result = device->CreateOffscreenPlainSurface(mode.Width, mode.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &surfaceRight, nullptr)
result = device->CreateOffscreenPlainSurface(mode.Width,mode.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &surfaceFront, nullptr)
result = device->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_MONO, &surfaceMono);
result = D3DXSaveSurfaceToFile(FilePathMono, D3DXIFF_PNG, surfaceMono, NULL, NULL);
result = device->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_LEFT, &surfaceLeft);
result = D3DXSaveSurfaceToFile(FilePathLeft, D3DXIFF_PNG, surfaceLeft, NULL, NULL);
result = device->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_RIGHT, &surfaceRight);
result = D3DXSaveSurfaceToFile(FilePathRight, D3DXIFF_PNG, surfaceRight, NULL, NULL);
result = device->GetFrontBufferData(0,surfaceFront);
result = D3DXSaveSurfaceToFile(FilePathFront, D3DXIFF_PNG, surfaceFront, NULL, NULL);`
In my output I get empty back buffers and only left frame in front.
I've tried to capture frames in loop and increase CreateOffscreenPlainSurface but it didn't help.
How do I get the right frame?
(win 7,quadro,3d vision)
I am a beginner at OpenCL. I tried to run a very simple kernel code, adding 1 to each value of vector. Everything runs fine, returns no error code (I checked return value after each step). The source Code :
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem memobj , resobj = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_platform_id platform_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
size_t work_units_per_kernels;
int input[10] = {1,2,3,4,5,6,7,8,9,10};
int output[10];
int length = 10 ;
FILE *fp;
char fileName[] = "/home/tuan/OpenCLPlayaround/hello.cl";
char *source_str;
size_t source_size;
/* Load the source code containing the kernel*/
fp = fopen(fileName, "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(0x100000);
source_size = fread(source_str,1,0x100000, fp);
fclose(fp);
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
std::cout<<ret<<" code"<<std::endl;
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
std::cout<<ret<<" code"<<std::endl;
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
std::cout<<ret<<" code"<<std::endl;
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
//Check Concept of memory
memobj = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,length * sizeof(int), input, &ret);
resobj = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length * sizeof(int), output, &ret);
std::cout<<ret<<" code"<<std::endl;
program = clCreateProgramWithSource(context,1,(const char**)&source_str, (const size_t*)&source_size, &ret);
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
kernel = clCreateKernel(program, "hello", &ret);
ret = clSetKernelArg(kernel,0, sizeof(memobj),(void *)&memobj);
ret = clSetKernelArg(kernel,1, sizeof(resobj),(void *)&resobj);
ret = clEnqueueTask(command_queue, kernel, 0, NULL,NULL);
ret = clEnqueueReadBuffer(command_queue, resobj, CL_TRUE, 0, length* sizeof(int),output, 0, NULL, NULL);
for (int i = 0 ; i <10 ; i++) {
std::cout<<output[i]<<" "<<std::endl;
}
return 0;
The result is somewhat bizarre, while it should be {2,3,4,5,6,7,8,9,10,11} :
2
-16777216
65535
1
-1242789408
32767
4201449
0
2
0
And my kernel :
__kernel void hello(__global int* a, __global int* b)
{
int sam = 0;
int gid = get_global_id(0);
b[gid] = sam + a[gid] +1 ;
}
Can somebody explain why ? Its bursting my head for hours !
clEnqueueTask is equivalent to calling clEnqueueNDRangeKernel with work_dim = 1, global_work_offset = NULL, global_work_size[0] set to 1, and local_work_size[0] set to 1.
so use clEnqueueNDRangeKernel.
I am new to the rendering API and I have been doing okay until now. I have been debugging a while to figure this one out and I just can't figure out why this isn't working. So I need some help.
I believe the source of the problem is here.
m_VertexShader->Release();
Because it doesn't return a HRESULT there isn't much I can't do.
Here is the code.
#include "shader.h"
ShaderProgram::ShaderProgram(Renderer& renderer, const char* vertShader, const char* pixShader)
: m_Renderer(renderer), m_VertexShaderSource(vertShader), m_PixelShaderSource(pixShader)
{
Load();
}
ShaderProgram::~ShaderProgram()
{
m_VertexShader->Release();
m_PixelShader->Release();
m_InputLayout->Release();
}
void ShaderProgram::Load()
{
vector<char> vertSource = FileReader::read_file(m_VertexShaderSource);
vector<char> fragSource = FileReader::read_file(m_PixelShaderSource);
auto resultVert = m_Renderer.getDevice()->CreateVertexShader(vertSource.data(), vertSource.size(), nullptr, &m_VertexShader);
auto resultFrag = m_Renderer.getDevice()->CreatePixelShader(fragSource.data(), fragSource.size(), nullptr, &m_PixelShader);
if (resultVert != S_OK || resultFrag != S_OK)
{
MessageBox(nullptr, "Failed to create shader!", "Error", MB_OK);
exit(0);
}
D3D11_INPUT_ELEMENT_DESC layout[]{
{ "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "COLOR", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }
};
auto result = m_Renderer.getDevice()->CreateInputLayout(layout, 2, vertSource.data(), vertSource.size(), &m_InputLayout);
if (result != S_OK)
{
MessageBox(nullptr, "Could not create the input layout!", "Error", MB_OK);
exit(0);
}
Here is my rendering class. BTW I have just added stencil and depth testing just now and that could have to do something with the problem.
#include "renderer.h"
Renderer::Renderer(Window& window)
{
createDevice(window);
createRenderTarget();
createDepthStencil();
}
Renderer::~Renderer()
{
m_SwapChain->Release();
m_Device->Release();
m_DeviceContex->Release();
m_RenderTargetView->Release();
m_DepthStencilView->Release();
}
void Renderer::createDevice(Window& window)
{
DXGI_SWAP_CHAIN_DESC swapChain = { 0 };
swapChain.BufferCount = 1;
swapChain.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
swapChain.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swapChain.OutputWindow = window.getHandle();
swapChain.SampleDesc.Count = 1;
swapChain.Windowed = true;
auto result = D3D11CreateDeviceAndSwapChain(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, 0, nullptr,
0, D3D11_SDK_VERSION, &swapChain, &m_SwapChain, &m_Device, nullptr, &m_DeviceContex);
if (result != S_OK)
{
MessageBox(nullptr, "Problem with creating DX11!", "Error", MB_OK);
exit(0);
}
}
void Renderer::createDepthStencil()
{
D3D11_TEXTURE2D_DESC depthStencilDesc;
depthStencilDesc.Width = m_BackBuffer.Width;
depthStencilDesc.Height = m_BackBuffer.Height;
depthStencilDesc.MipLevels = 1;
depthStencilDesc.ArraySize = 1;
depthStencilDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
depthStencilDesc.SampleDesc.Count = 1;
depthStencilDesc.SampleDesc.Quality = 0;
depthStencilDesc.Usage = D3D11_USAGE_DEFAULT;
depthStencilDesc.BindFlags = D3D11_BIND_DEPTH_STENCIL;
depthStencilDesc.CPUAccessFlags = 0;
depthStencilDesc.MiscFlags = 0;
m_Device->CreateTexture2D(&depthStencilDesc, nullptr, &m_DepthStencilBuffer);
m_Device->CreateDepthStencilView(m_DepthStencilBuffer, nullptr, &m_DepthStencilView);
m_DepthStencilBuffer->Release();
}
void Renderer::createRenderTarget()
{
ID3D11Texture2D* backBuffer;
auto result = m_SwapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&backBuffer);
if (result != S_OK)
{
MessageBox(nullptr, "Failed to get the swap chain buffer!", "Error", MB_OK);
exit(0);
}
m_Device->CreateRenderTargetView(backBuffer, nullptr, &m_RenderTargetView);
backBuffer->GetDesc(&m_BackBuffer);
backBuffer->Release();
}
void Renderer::beginFrame()
{
m_DeviceContex->OMSetRenderTargets(1, &m_RenderTargetView, m_DepthStencilView);
auto viewport = CD3D11_VIEWPORT(0.0f, 0.0f, (float) m_BackBuffer.Width, (float) m_BackBuffer.Height);
viewport.MinDepth = 0.0f;
viewport.MaxDepth = 1.0f;
m_DeviceContex->RSSetViewports(1, &viewport);
float clearColor[] = { 0.25f, 0.75f, 0.8f, 1.0f };
m_DeviceContex->ClearRenderTargetView(m_RenderTargetView, clearColor);
m_DeviceContex->ClearDepthStencilView(m_DepthStencilView, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 1.0f, 0);
}
//Swaps the buffer!
void Renderer::endFrame()
{
m_SwapChain->Present(1, 0);
}
}
Do you get the same error when you run it in non-debug mode? With the limited info you've given it looks like your debug symbol table is not being found. If its all fine and dandy in non-debug mode then its off to sift through the docs to find out how to load the symbol table. Not much of an answer but hope it might help a little
You really should move to using Microsoft::WRL::ComPtr. With raw pointers, you are likely to call Release too few or too many times, which is what happened here. See this page.
It would remove silly things like the fact that you keep a dangling pointer to m_DepthStencilView even after you release it. You don't need to make it a member variable at all since you only use it to create m_DepthStencilView.
#include <wrl/client.h>
using Microsoft::WRL::ComPtr;
...
ComPtr<ID3D11Texture2D> depthStencil;
m_Device->CreateTexture2D(&depthStencilDesc, nullptr, &depthStencil);
m_Device->CreateDepthStencilView(depthStencil.Get(), nullptr, &m_DepthStencilView);
Take a look at the Direct3D 11 Game Visual Studio templates, in particular the implementation of DeviceResources.h / DeviceResources.cpp.