Related
I am trying to write the simplest possible compute shader in DirectX12 so that I can have a starting point for a real project. However, it seems like no matter what I do I am unable to get my GPU to process "1+1" and see the output. As there is almost no documentation on compute shaders, I figured my only option now is to query StackOverflow.
I wrote the following code using the D3D12nBodyGravity project. First I copied as much of the code over as verbatim as possible, fixed "small" things, and then once it was all working I started trimming the code down to the basics. I am using Visual Studio 2019.
myClass.cpp:
#include "pch.h"
#include "myClass.h"
#include <d3dcompiler.h> // D3DReadFileToBlob
#include "Common\DirectXHelper.h" // NAME_D3D12_OBJECT
#include "Common\Device.h"
#include <iostream>
// InterlockedCompareExchange returns the object's value if the
// comparison fails. If it is already 0, then its value won't
// change and 0 will be returned.
#define InterlockedGetValue(object) InterlockedCompareExchange(object, 0, 0)
myClass::myClass()
: m_frameIndex(0)
, m_UavDescriptorSize(0)
, m_renderContextFenceValue(0)
, m_frameFenceValues{} {
std::cout << "Initializing myClass" << std::endl;
m_FenceValue = 0;
//std::cout << "Calling DXGIDeclareAdapterRemovalSupport()" << std::endl;
//DX::ThrowIfFailed(DXGIDeclareAdapterRemovalSupport());
// Identify the device
std::cout << "Identifying the device" << std::endl;
auto m_device = Device::Get().GetDevice();
std::cout << "Leading the rendering pipeline dependencies" << std::endl;
// Load the rendering pipeline dependencies.
{
std::cout << " Creating the root signatures" << std::endl;
// Create the root signatures.
{
CD3DX12_ROOT_PARAMETER rootParameter;
rootParameter.InitAsUnorderedAccessView(0);
Microsoft::WRL::ComPtr<ID3DBlob> signature;
Microsoft::WRL::ComPtr<ID3DBlob> error;
CD3DX12_ROOT_SIGNATURE_DESC computeRootSignatureDesc(1, &rootParameter, 0, nullptr);
DX::ThrowIfFailed(D3D12SerializeRootSignature(&computeRootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error));
DX::ThrowIfFailed(m_device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&m_computeRootSignature)));
}
// Describe and create the command queue.
std::cout << " Describing and creating the command queue" << std::endl;
D3D12_COMMAND_QUEUE_DESC queueDesc = {};
queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
DX::ThrowIfFailed(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_commandQueue)));
NAME_D3D12_OBJECT(m_commandQueue);
std::cout << " Creating descriptor heaps" << std::endl;
// Create descriptor heaps.
{
// Describe and create a shader resource view (SRV) and unordered
// access view (UAV) descriptor heap.
D3D12_DESCRIPTOR_HEAP_DESC UavHeapDesc = {};
UavHeapDesc.NumDescriptors = DescriptorCount;
UavHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
UavHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
DX::ThrowIfFailed(m_device->CreateDescriptorHeap(&UavHeapDesc, IID_PPV_ARGS(&m_UavHeap)));
NAME_D3D12_OBJECT(m_UavHeap);
m_UavDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
std::cout << " Creating a command allocator for each frame" << std::endl;
// Create a command allocator for each frame.
for (UINT n = 0; n < FrameCount; n++) {
DX::ThrowIfFailed(m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocators[n])));
}
} // Load the rendering pipeline dependencies.
std::cout << "Loading the sample assets" << std::endl;
// Load the sample assets.
{
std::cout << " Creating the pipeline states, including compiling and loading shaders" << std::endl;
// Create the pipeline states, which includes compiling and loading shaders.
{
Microsoft::WRL::ComPtr<ID3DBlob> computeShader;
#if defined(_DEBUG)
// Enable better shader debugging with the graphics debugging tools.
UINT compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
#else
UINT compileFlags = 0;
#endif
// Load and compile the compute shader.
DX::ThrowIfFailed(D3DReadFileToBlob(L"ComputeShader.cso", &computeShader));
auto convert_blob_to_byte = [](Microsoft::WRL::ComPtr<ID3DBlob> blob) {
auto* p = reinterpret_cast<unsigned char*>(blob->GetBufferPointer());
auto n = blob->GetBufferSize();
std::vector<unsigned char> buff;
buff.reserve(n);
std::copy(p, p + n, std::back_inserter(buff));
return buff;
};
std::vector<BYTE> m_computeShader = convert_blob_to_byte(computeShader);
// Describe and create the compute pipeline state object (PSO).
D3D12_COMPUTE_PIPELINE_STATE_DESC computePsoDesc = {};
computePsoDesc.pRootSignature = m_computeRootSignature.Get();
computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(computeShader.Get());
DX::ThrowIfFailed(m_device->CreateComputePipelineState(&computePsoDesc, IID_PPV_ARGS(&m_computeState)));
NAME_D3D12_OBJECT(m_computeState);
}
std::cout << " Creating the command list" << std::endl;
// Create the command list.
DX::ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocators[m_frameIndex].Get(), m_computeState.Get(), IID_PPV_ARGS(&m_commandList)));
NAME_D3D12_OBJECT(m_commandList);
std::cout << " Initializing the data in the buffers" << std::endl;
// Initialize the data in the buffers.
{
data.resize(2);
for (unsigned int i = 0; i < data.size(); i++) {
data[i] = 0.0f;
}
const UINT dataSize = data.size() * sizeof(data[0]);
D3D12_HEAP_PROPERTIES defaultHeapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT);
D3D12_HEAP_PROPERTIES uploadHeapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD);
D3D12_HEAP_PROPERTIES readbackHeapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK);
D3D12_RESOURCE_DESC bufferDesc = CD3DX12_RESOURCE_DESC::Buffer(dataSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
D3D12_RESOURCE_DESC uploadBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(dataSize);
readbackBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(dataSize);
DX::ThrowIfFailed(m_device->CreateCommittedResource(
&defaultHeapProperties,
D3D12_HEAP_FLAG_NONE,
&bufferDesc,
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(&m_dataBuffer)));
m_dataBuffer.Get()->SetName(L"m_dataBuffer");
DX::ThrowIfFailed(m_device->CreateCommittedResource(
&uploadHeapProperties,
D3D12_HEAP_FLAG_NONE,
&uploadBufferDesc,
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&m_dataBufferUpload)));
m_dataBufferUpload.Get()->SetName(L"m_dataBufferUpload");
DX::ThrowIfFailed(m_device->CreateCommittedResource(
&readbackHeapProperties,
D3D12_HEAP_FLAG_NONE,
&readbackBufferDesc,
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(&m_dataBufferReadback)));
m_dataBufferReadback.Get()->SetName(L"m_dataBufferReadback");
NAME_D3D12_OBJECT(m_dataBuffer);
dataSubResource = {};
dataSubResource.pData = &data[0];
dataSubResource.RowPitch = dataSize;
dataSubResource.SlicePitch = dataSubResource.RowPitch;
UpdateSubresources<1>(m_commandList.Get(), m_dataBuffer.Get(), m_dataBufferUpload.Get(), 0, 0, 1, &dataSubResource);
m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_dataBuffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COMMON));
m_commandList->CopyResource(m_dataBufferReadback.Get(), m_dataBufferUpload.Get());
D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
uavDesc.Format = DXGI_FORMAT_UNKNOWN;
uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uavDesc.Buffer.FirstElement = 0;
uavDesc.Buffer.NumElements = 1;
uavDesc.Buffer.StructureByteStride = sizeof(data[0]);
uavDesc.Buffer.CounterOffsetInBytes = 0;
uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
CD3DX12_CPU_DESCRIPTOR_HANDLE uavHandle0(m_UavHeap->GetCPUDescriptorHandleForHeapStart(), Uav, m_UavDescriptorSize);
m_device->CreateUnorderedAccessView(m_dataBuffer.Get(), nullptr, &uavDesc, uavHandle0);
} // Initialize the data in the buffers.
std::cout << " Closing the command list and executing it to begind the initial GPU setup" << std::endl;
// Close the command list and execute it to begin the initial GPU setup.
DX::ThrowIfFailed(m_commandList->Close());
ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() };
m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
std::cout << " Creating synchronization objects and wait until assets have been uploaded to the GPU" << std::endl;
// Create synchronization objects and wait until assets have been uploaded to the GPU.
{
DX::ThrowIfFailed(m_device->CreateFence(m_renderContextFenceValue, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_renderContextFence)));
m_renderContextFenceValue++;
m_renderContextFenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
if (m_renderContextFenceEvent == nullptr) {
DX::ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()));
}
// Add a signal command to the queue.
DX::ThrowIfFailed(m_commandQueue->Signal(m_renderContextFence.Get(), m_renderContextFenceValue));
// Instruct the fence to set the event object when the signal command completes.
DX::ThrowIfFailed(m_renderContextFence->SetEventOnCompletion(m_renderContextFenceValue, m_renderContextFenceEvent));
m_renderContextFenceValue++;
// Wait until the signal command has been processed.
WaitForSingleObject(m_renderContextFenceEvent, INFINITE);
}
} // Load the sample assets.
std::cout << "Creating compute resources" << std::endl;
{
// Create compute resources.
D3D12_COMMAND_QUEUE_DESC queueDesc = { D3D12_COMMAND_LIST_TYPE_COMPUTE, 0, D3D12_COMMAND_QUEUE_FLAG_NONE };
DX::ThrowIfFailed(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_computeCommandQueue)));
DX::ThrowIfFailed(m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE, IID_PPV_ARGS(&m_computeAllocator)));
DX::ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE, m_computeAllocator.Get(), nullptr, IID_PPV_ARGS(&m_computeCommandList)));
DX::ThrowIfFailed(m_device->CreateFence(0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&m_Fence)));
m_FenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
if (m_FenceEvent == nullptr) {
DX::ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()));
}
}
std::cout << "Calculating" << std::endl;
Calculate();
std::cout << "Finished" << std::endl;
}
void myClass::Calculate() {
m_computeCommandList.Get()->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_dataBuffer.Get(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_UNORDERED_ACCESS));
m_computeCommandList.Get()->SetPipelineState(m_computeState.Get());
m_computeCommandList.Get()->SetComputeRootSignature(m_computeRootSignature.Get());
ID3D12DescriptorHeap* ppHeaps[] = { m_UavHeap.Get() };
m_computeCommandList.Get()->SetDescriptorHeaps(_countof(ppHeaps), ppHeaps);
CD3DX12_GPU_DESCRIPTOR_HANDLE uavHandle(m_UavHeap->GetGPUDescriptorHandleForHeapStart(), Uav, m_UavDescriptorSize);
m_computeCommandList.Get()->SetComputeRootUnorderedAccessView(ComputeRootUAVTable, m_dataBuffer->GetGPUVirtualAddress());
m_computeCommandList.Get()->Dispatch(1, 1, 1);
m_computeCommandList.Get()->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_dataBuffer.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COMMON));
// Close and execute the command list.
DX::ThrowIfFailed(m_computeCommandList.Get()->Close());
ID3D12CommandList* commandLists[] = { m_computeCommandList.Get() };
m_computeCommandQueue->ExecuteCommandLists(1, commandLists);
// Wait for the compute shader to complete the calculation.
UINT64 FenceValue = InterlockedIncrement(&m_FenceValue);
DX::ThrowIfFailed(m_computeCommandQueue.Get()->Signal(m_Fence.Get(), FenceValue));
DX::ThrowIfFailed(m_Fence.Get()->SetEventOnCompletion(FenceValue, m_FenceEvent));
WaitForSingleObject(m_FenceEvent, INFINITE);
std::cout << "FenceValue = " << FenceValue << " " << m_FenceValue << " " << m_Fence.Get()->GetCompletedValue() << std::endl;
// Check the output!
float* dataptr = nullptr;
D3D12_RANGE range = { 0, readbackBufferDesc.Width };
DX::ThrowIfFailed(m_dataBufferReadback->Map(0, &range, (void**)&dataptr));
for (int i = 0; i < readbackBufferDesc.Width / sizeof(data[0]); i++)
printf("uav[%d] = %.2f\n", i, dataptr[i]);
m_dataBufferReadback->Unmap(0, nullptr);
for (unsigned int i = 0; i < data.size(); i++) {
std::cout << "data[" << i << "] = " << data[i] << std::endl;
}
}
myClass.h:
#pragma once
#include "Common\Device.h"
#include <iostream>
// We have to write all of this as its own class, otherwise we cannot
// use the "this" pointer when we create compute resources. We need to
// do that because this code tagets multithreading.
class myClass {
public:
myClass();
private:
// Two buffers full of data are used. The compute thread alternates
// writing to each of them. The render thread renders using the
// buffer that is not currently in use by the compute shader.
//struct Data {
// float c;
//};
//std::vector<Data> data;
std::vector<float> data;
// For the compute pipeline, the CBV is a struct containing some
// constants used in the compute shader.
struct ConstantBufferCS {
float a;
float b;
};
D3D12_SUBRESOURCE_DATA dataSubResource;
static const UINT FrameCount = 1;
//static const UINT ThreadCount = 1;
UINT m_heightInstances;
UINT m_widthInstances;
UINT m_frameIndex;
Microsoft::WRL::ComPtr<ID3D12RootSignature> m_rootSignature;
Microsoft::WRL::ComPtr<ID3D12RootSignature> m_computeRootSignature;
Microsoft::WRL::ComPtr<ID3D12CommandQueue> m_commandQueue;
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> m_UavHeap;
Microsoft::WRL::ComPtr<ID3D12CommandAllocator> m_commandAllocators[FrameCount];
Microsoft::WRL::ComPtr<ID3D12PipelineState> m_computeState;
Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> m_commandList;
Microsoft::WRL::ComPtr<ID3D12Resource> m_constantBufferCS;
UINT64 m_renderContextFenceValue;
HANDLE m_renderContextFenceEvent;
UINT64 m_frameFenceValues[FrameCount];
UINT m_UavDescriptorSize;
ConstantBufferCS constantBufferCS;
Microsoft::WRL::ComPtr<ID3D12Resource> constantBufferCSUpload;
Microsoft::WRL::ComPtr<ID3D12Fence> m_renderContextFence;
Microsoft::WRL::ComPtr<ID3D12Resource> m_dataBuffer;
Microsoft::WRL::ComPtr<ID3D12Resource> m_dataBufferUpload;
Microsoft::WRL::ComPtr<ID3D12Resource> m_dataBufferReadback;
// Compute objects.
Microsoft::WRL::ComPtr<ID3D12CommandAllocator> m_computeAllocator;
Microsoft::WRL::ComPtr<ID3D12CommandQueue> m_computeCommandQueue;
Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> m_computeCommandList;
Microsoft::WRL::ComPtr<ID3D12Fence> m_Fence;
volatile HANDLE m_FenceEvent;
D3D12_RESOURCE_DESC readbackBufferDesc;
// State
UINT64 volatile m_FenceValue;
/*
struct ThreadData {
myClass* pContext;
UINT threadIndex;
};
ThreadData m_threadData;
HANDLE m_threadHandles;
*/
void Calculate();
// Indices of shader resources in the descriptor heap.
enum DescriptorHeapIndex : UINT32 {
Uav = 0,
DescriptorCount = 1
};
enum ComputeRootParameters : UINT32 {
//ComputeRootCBV = 0,
ComputeRootUAVTable = 0,
ComputeRootParametersCount
};
};
Device.cpp:
#pragma once
#include "pch.h"
#include "Device.h"
#include "DirectXHelper.h"
#include <cassert> // for "assert"
#include <iostream>
static Device* gs_pSingelton = nullptr;
// Constructor
Device::Device(HINSTANCE hInst, bool useWarp)
: m_hInstance(hInst)
, m_useWarp(useWarp)
{
}
void Device::Initialize() {
#if defined(_DEBUG)
// Always enable the debug layer before doing anything DX12 related
// so all possible errors generated while creating DX12 objects
// are caught by the debug layer.
Microsoft::WRL::ComPtr<ID3D12Debug1> debugInterface;
DX::ThrowIfFailed(D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)));
debugInterface->EnableDebugLayer();
// Enable these if you want full validation (will slow down rendering a lot).
//debugInterface->SetEnableGPUBasedValidation(TRUE);
//debugInterface->SetEnableSynchronizedCommandQueueValidation(TRUE);
#endif
auto dxgiAdapter = GetAdapter(false);
if (!dxgiAdapter) { // If no supporting DX12 adapters exist, fall back to WARP
dxgiAdapter = GetAdapter(true);
}
if (dxgiAdapter) {
m_device = CreateDevice(dxgiAdapter);
}
else {
throw std::exception("DXGI adapter enumeration failed.");
}
}
void Device::Create(HINSTANCE hInst) {
if (!gs_pSingelton) {
gs_pSingelton = new Device(hInst);
gs_pSingelton->Initialize();
}
}
Device& Device::Get() {
assert(gs_pSingelton);
return *gs_pSingelton;
}
void Device::Destroy() {
if (gs_pSingelton) {
delete gs_pSingelton;
gs_pSingelton = nullptr;
}
}
// Destructor
Device::~Device() {
}
Microsoft::WRL::ComPtr<ID3D12Device2> Device::CreateDevice(Microsoft::WRL::ComPtr<IDXGIAdapter4> adapter) {
Microsoft::WRL::ComPtr<ID3D12Device2> d3d12Device2;
DX::ThrowIfFailed(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&d3d12Device2)));
// Enable debug messages in debug mode.
#if defined(_DEBUG)
Microsoft::WRL::ComPtr<ID3D12InfoQueue> pInfoQueue;
if (SUCCEEDED(d3d12Device2.As(&pInfoQueue))) {
pInfoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE);
pInfoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE);
pInfoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE);
// Suppress whole categories of messages
//D3D12_MESSAGE_CATEGORY Categories[] = {};
// Suppress messages based on their severity level
D3D12_MESSAGE_SEVERITY Severities[] = { D3D12_MESSAGE_SEVERITY_INFO };
// Suppress individual messages by their ID
D3D12_MESSAGE_ID DenyIds[] = {
D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, // I'm really not sure how to avoid this message.
D3D12_MESSAGE_ID_MAP_INVALID_NULLRANGE, // This warning occurs when using capture frame while graphics debugging.
D3D12_MESSAGE_ID_UNMAP_INVALID_NULLRANGE, // This warning occurs when using capture frame while graphics debugging.
};
D3D12_INFO_QUEUE_FILTER NewFilter = {};
//NewFilter.DenyList.NumCategories = _countof(Categories);
//NewFilter.DenyList.pCategoryList = Categories;
NewFilter.DenyList.NumSeverities = _countof(Severities);
NewFilter.DenyList.pSeverityList = Severities;
NewFilter.DenyList.NumIDs = _countof(DenyIds);
NewFilter.DenyList.pIDList = DenyIds;
DX::ThrowIfFailed(pInfoQueue->PushStorageFilter(&NewFilter));
}
#endif
return d3d12Device2;
}
Microsoft::WRL::ComPtr<IDXGIAdapter4> Device::GetAdapter(bool useWarp) {
UINT createFactoryFlags = 0;
#if defined(_DEBUG)
createFactoryFlags = DXGI_CREATE_FACTORY_DEBUG;
#endif
DX::ThrowIfFailed(CreateDXGIFactory2(createFactoryFlags, IID_PPV_ARGS(&m_factory)));
Microsoft::WRL::ComPtr<IDXGIAdapter1> dxgiAdapter1;
Microsoft::WRL::ComPtr<IDXGIAdapter4> dxgiAdapter4;
if (useWarp) {
DX::ThrowIfFailed(m_factory->EnumWarpAdapter(IID_PPV_ARGS(&dxgiAdapter1)));
DX::ThrowIfFailed(dxgiAdapter1.As(&dxgiAdapter4));
}
else {
SIZE_T maxDedicatedVideoMemory = 0;
for (UINT i = 0; m_factory->EnumAdapters1(i, &dxgiAdapter1) != DXGI_ERROR_NOT_FOUND; ++i) {
DXGI_ADAPTER_DESC1 dxgiAdapterDesc1;
dxgiAdapter1->GetDesc1(&dxgiAdapterDesc1);
// Check to see if the adapter can create a D3D12 device without actually
// creating it. The adapter with the largest dedicated video memory
// is favored.
if ((dxgiAdapterDesc1.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) == 0 &&
SUCCEEDED(D3D12CreateDevice(dxgiAdapter1.Get(),
D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device), nullptr)) &&
dxgiAdapterDesc1.DedicatedVideoMemory > maxDedicatedVideoMemory) {
maxDedicatedVideoMemory = dxgiAdapterDesc1.DedicatedVideoMemory;
DX::ThrowIfFailed(dxgiAdapter1.As(&dxgiAdapter4));
}
}
}
return dxgiAdapter4;
}
Device.h:
#pragma once
#include <dxgi1_6.h> // IDXGIAdapter4
// We require this file because we are unable to pass the device pointer to everywhere we need to.
class Device {
public:
/**
* Create the device singleton with the device instance handle.
*/
static void Create(HINSTANCE hInst);
/**
* Destroy the device instance.
*/
static void Destroy();
/**
* Get the device singleton.
*/
static Device& Get();
/**
* Get the Direct3D 12 device
*/
Microsoft::WRL::ComPtr<ID3D12Device2> GetDevice() const { return m_device; }
Microsoft::WRL::ComPtr<IDXGIFactory4> GetFactory() const { return m_factory; }
protected:
// Create a device instance
Device(HINSTANCE hInst, bool useWarp = false);
// Destroy the device instance.
virtual ~Device();
// Initialize the device instance.
void Initialize();
Microsoft::WRL::ComPtr<IDXGIAdapter4> GetAdapter(bool useWarp);
Microsoft::WRL::ComPtr<ID3D12Device2> CreateDevice(Microsoft::WRL::ComPtr<IDXGIAdapter4> adapter);
private:
Device(const Device& copy) = delete;
Device& operator=(const Device& other) = delete;
HINSTANCE m_hInstance;
Microsoft::WRL::ComPtr<ID3D12Device2> m_device;
Microsoft::WRL::ComPtr<IDXGIFactory4> m_factory;
bool m_useWarp;
};
ComputeShader.hlsl:
RWStructuredBuffer<float> output : register(u0); // UAV
[numthreads(1, 1, 1)]
void main( uint3 DTid : SV_DispatchThreadID ) {
output[DTid.x] = 1 + 1;
}
Please let me know if you are able to find what I do not understand. I can also try uploading my project to GitHub if it helps... SOS :(
I'm playing around with some code in a project; specifically a function that takes SVG images and makes pngs out of them.
I have this:
typedef std::vector<uint8_t> BinaryBuffer;
BinaryBuffer readFile(fs::path const& path) {
BinaryBuffer ret;
fs::ifstream f(path, std::ios::binary);
f.seekg(0, std::ios::end);
ret.resize(f.tellg());
f.seekg(0);
f.read(reinterpret_cast<char*>(ret.data()), ret.size());
if (!f) throw std::runtime_error("File cannot be read: " + path.string());
return ret;
}
void loadSVG(Bitmap& bitmap, fs::path const& filename) {
double factor = config["graphic/svg_lod"].f();
// Try to load a cached PNG instead
if (cache::loadSVG(bitmap, filename, factor)) return;
std::clog << "image/debug: Loading SVG: " + filename.string() << std::endl;
// Open the SVG file in librsvg
#if !GLIB_CHECK_VERSION(2, 36, 0) // Avoid deprecation warnings
g_type_init();
#endif
GError* pError = nullptr;
std::shared_ptr<RsvgHandle> svgHandle(rsvg_handle_new_with_flags(RSVG_HANDLE_FLAG_KEEP_IMAGE_DATA), g_object_unref);
rsvg_handle_set_base_uri(svgHandle.get(),filename.string().c_str());
BinaryBuffer data = readFile(filename);
std::clog << "svg/debug: svg data size is: " << data.size() << std::endl;
gboolean result = rsvg_handle_write(svgHandle.get(), data.data(), data.size(), &pError);
// rsvg_handle_new_from_file(filename.string().c_str(), &pError)
if (result != TRUE) {
g_error_free(pError);
throw std::runtime_error("Unable to load " + filename.string());
}
else {
std::clog << "svg/debug: SVG loaded succesfully." << std::endl;
}
// Get SVG dimensions
RsvgDimensionData svgDimension;
rsvg_handle_get_dimensions(svgHandle.get(), &svgDimension);
// Prepare the pixel buffer
std::clog << "svg/debug: svg width is: " << svgDimension.width << ", and height: " << svgDimension.height << std::endl;
bitmap.resize(svgDimension.width*factor, svgDimension.height*factor);
bitmap.fmt = pix::INT_ARGB;
bitmap.linearPremul = true;
// Raster with Cairo
std::shared_ptr<cairo_surface_t> surface(
cairo_image_surface_create_for_data(&bitmap.buf[0], CAIRO_FORMAT_ARGB32, bitmap.width, bitmap.height, bitmap.width * 4),
cairo_surface_destroy);
std::shared_ptr<cairo_t> dc(cairo_create(surface.get()), cairo_destroy);
cairo_scale(dc.get(), factor, factor);
gboolean renderRes = TRUE;
renderRes = rsvg_handle_render_cairo(svgHandle.get(), dc.get());
if (renderRes != TRUE) {
throw std::runtime_error("Unable to render " + filename.string());
}
// Change byte order from BGRA to RGBA
for (uint32_t *ptr = reinterpret_cast<uint32_t*>(&*bitmap.buf.begin()), *end = ptr + bitmap.buf.size() / 4; ptr < end; ++ptr) {
uint8_t* pixel = reinterpret_cast<uint8_t*>(ptr);
uint8_t r = pixel[2], g = pixel[1], b = pixel[0], a = pixel[3];
pixel[0] = r; pixel[1] = g; pixel[2] = b; pixel[3] = a;
}
bitmap.fmt = pix::CHAR_RGBA;
// Write to cache so that it can be loaded faster the next time
fs::path cache_filename = cache::constructSVGCacheFileName(filename, factor);
fs::create_directories(cache_filename.parent_path());
writePNG(cache_filename, bitmap);
}
But it fails in rsvg_handle_render_cairo... I don't know why. A previous version of the function using rsvg_handle_new_from_file (which doesn't use the BinaryBuffer struct) worked fine. Note, however that essentially the same struct and readFile function is used elsewhere without any issues. And from the debug messages I've dropped in there I can see that the file is indeed getting read. I am also getting correct dimensions from my svg file and there don't seem to be any errors prior to the call to the render function (so I'd assume it's parsed ok) but maybe not?
The answer was ridiculously simple. I was missing rsvg_handle_close(svgHandle.get(), &pError);
Now, i'm using real sense D435 camera.
I installed sdk 2.0 full package and upgraded camera version 5.1 to 5.9(latest version).
I want to code to get color image and depth image using visual studio 2015.
so i coded
#include <iostream>
#include "pxcsession.h"
#include "pxcprojection.h"
#include "pxcsensemanager.h"
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <Windows.h>
#pragma comment(lib, "winmm.lib")
using namespace cv;
using namespace std;
class RealSenseAsenseManager
{
public:
~RealSenseAsenseManager()
{
if (senseManager != 0) {
senseManager->Release();
}
}
void initialize()
{
senseManager = PXCSenseManager::CreateInstance();
if (senseManager == nullptr) {
throw std::runtime_error("SenseManager failed");
}
pxcStatus sts = senseManager->EnableStream(
PXCCapture::StreamType::STREAM_TYPE_DEPTH,
DEPTH_WIDTH, DEPTH_HEIGHT, DEPTH_FPS);
if (sts < PXC_STATUS_NO_ERROR) {
throw std::runtime_error("Depth stream activation failed");
}
sts = senseManager->Init();
if (sts < PXC_STATUS_NO_ERROR) {
throw std::runtime_error("Pipeline Initialzation failed");
}
senseManager->QueryCaptureManager()->QueryDevice()->SetMirrorMode(
PXCCapture::Device::MirrorMode::MIRROR_MODE_HORIZONTAL);
}
void run()
{
while (1) {
updateFrame();
auto ret = showImage();
if (!ret) {
break;
}
}
}
private:
void updateFrame()
{
pxcStatus sts = senseManager->AcquireFrame(false);
if (sts < PXC_STATUS_NO_ERROR) {
return;
}
const PXCCapture::Sample *sample = senseManager->QuerySample();
if (sample) {
updateDepthImage(sample->depth);
}
senseManager->ReleaseFrame();
}
void updateDepthImage(PXCImage* depthFrame)
{
if (depthFrame == 0) {
return;
}
PXCImage::ImageData data;
pxcStatus sts = depthFrame->AcquireAccess(
PXCImage::Access::ACCESS_READ,
PXCImage::PixelFormat::PIXEL_FORMAT_RGB32, &data);
if (sts < PXC_STATUS_NO_ERROR) {
throw std::runtime_error("Taking Depth image failed");
}
PXCImage::ImageInfo info = depthFrame->QueryInfo();
depthImage = cv::Mat(info.height, info.width, CV_8UC4);
memcpy(depthImage.data, data.planes[0], data.pitches[0] * info.height);
depthFrame->ReleaseAccess(&data);
}
bool showImage()
{
if (depthImage.rows == 0 || (depthImage.cols == 0)) {
return true;
}
cv::imshow("Depth Image", depthImage);
int c = cv::waitKey(10);
if ((c == 27) || (c == 'q') || (c == 'Q')) {
// ESC|q|Q for Exit
return false;
}
return true;
}
private:
cv::Mat depthImage;
PXCSenseManager *senseManager = 0;
const int DEPTH_WIDTH = 640;
const int DEPTH_HEIGHT = 480;
const int DEPTH_FPS = 30.0f;
};
void main()
{
try {
RealSenseAsenseManager deep;
deep.initialize();
deep.run();
}
catch (std::exception& ex) {
std::cout << ex.what() << std::endl;
}
}
But this error appears.
sts = senseManager->Init();
if (sts < PXC_STATUS_NO_ERROR) {
throw std::runtime_error("Pipeline Initialzation failed");
}
Pipeline Initialization failed <-
I don't know how to solve this problem.
The depth camera connection is not likely to be wrong.
The color image is displayed. Only depth video is not available.
How I can solve this problem??
Thank you for reading my question.
The D400 series cameras aren't compatible with the old Realsense SDK, only the new librealsense SDK, available here: https://github.com/IntelRealSense/librealsense.
A sample showing how to get the colour and depth images streaming is here: https://github.com/IntelRealSense/librealsense/tree/master/examples/capture
You can start by using one of the provided examples.
The code below configures the camera and renders Depth & RGB data:
(the example.hpp header is located in the main repo /examples dir)
#include <librealsense2/rs.hpp> // Include RealSense Cross Platform API
#include "example.hpp" // Include short list of convenience functions for rendering
// Capture Example demonstrates how to
// capture depth and color video streams and render them to the screen
int main(int argc, char * argv[]) try
{
rs2::log_to_console(RS2_LOG_SEVERITY_ERROR);
// Create a simple OpenGL window for rendering:
window app(1280, 720, "RealSense Capture Example");
// Declare two textures on the GPU, one for color and one for depth
texture depth_image, color_image;
// Declare depth colorizer for pretty visualization of depth data
rs2::colorizer color_map;
// Declare RealSense pipeline, encapsulating the actual device and sensors
rs2::pipeline pipe;
// Start streaming with default recommended configuration
pipe.start();
while(app) // Application still alive?
{
rs2::frameset data = pipe.wait_for_frames(); // Wait for next set of frames from the camera
rs2::frame depth = color_map(data.get_depth_frame()); // Find and colorize the depth data
rs2::frame color = data.get_color_frame(); // Find the color data
// For cameras that don't have RGB sensor, we'll render infrared frames instead of color
if (!color)
color = data.get_infrared_frame();
// Render depth on to the first half of the screen and color on to the second
depth_image.render(depth, { 0, 0, app.width() / 2, app.height() });
color_image.render(color, { app.width() / 2, 0, app.width() / 2, app.height() });
}
return EXIT_SUCCESS;
}
catch (const rs2::error & e)
{
std::cerr << "RealSense error calling " << e.get_failed_function() << "(" << e.get_failed_args() << "):\n " << e.what() << std::endl;
return EXIT_FAILURE;
}
catch (const std::exception& e)
{
std::cerr << e.what() << std::endl;
return EXIT_FAILURE;
}
I am starting with SDL, and I was reading the introduction, and I am trying the drawPixel method they have. What I am doing is a ppm viewer, so far I have the rgb values in an array and are correctly stored (i checked them by printing the array and making sure they correspond to their position in the ppm file) and I want to use SDL to draw the picture. So far the code I've written is (this is the main.cpp file, if ppm.hpp and ppm.cpp are needed please tell me so to add them)
#include <iostream>
#include <SDL/SDL.h>
#include "ppm.hpp"
using namespace std;
void drawPixel (SDL_Surface*, Uint8, Uint8, Uint8, int, int);
int main (int argc, char** argv) {
PPM ppm ("res/cake.ppm");
if (SDL_Init(SDL_INIT_AUDIO | SDL_INIT_VIDEO) < 0) {
cerr << "Unable to init SDL: " << SDL_GetError() << endl;
exit(1);
}
atexit(SDL_Quit); // to automatically call SDL_Quit() when the program terminates
SDL_Surface* screen;
screen = SDL_SetVideoMode(ppm.width(), ppm.height(), 32, SDL_SWSURFACE);
if (screen == nullptr) {
cerr << "Unable to set " << ppm.width() << "x" << ppm.height() << " video: " << SDL_GetError() << endl;
exit(1);
}
for (int i = 0; i < ppm.width(); i++) {
for(int j = 0; j < ppm.height(); j++) {
drawPixel(screen, ppm.red(i,j), ppm.green(i,j), ppm.blue(i,j), i, j);
}
}
return 0;
}
void drawPixel (SDL_Surface* screen, Uint8 R, Uint8 G, Uint8 B, int x, int y) {
Uint32 color = SDL_MapRGB(screen->format, R, G, B);
if (SDL_MUSTLOCK(screen)) {
if (SDL_LockSurface(screen) < 0) {
return;
}
}
switch (screen->format->BytesPerPixel) {
case 1: { // Assuming 8-bpp
Uint8* bufp;
bufp = (Uint8*)screen->pixels + y * screen->pitch + x;
*bufp = color;
}
break;
case 2: { // Probably 15-bpp or 16-bpp
Uint16 *bufp;
bufp = (Uint16*)screen->pixels + y * screen->pitch / 2 + x;
*bufp = color;
}
break;
case 3: { // Slow 24-bpp mode, usually not used
Uint8* bufp;
bufp = (Uint8*)screen->pixels + y * screen->pitch + x;
*(bufp + screen->format->Rshift / 8) = R;
*(bufp + screen->format->Gshift / 8) = G;
*(bufp + screen->format->Bshift / 8) = B;
}
break;
case 4: { // Probably 32-bpp
Uint32* bufp;
bufp = (Uint32*)screen->pixels + y * screen->pitch / 4 + x;
*bufp = color;
}
break;
}
if (SDL_MUSTLOCK(screen)) {
SDL_UnlockSurface(screen);
}
SDL_UpdateRect(screen, x, y, 1, 1);
}
The drawPixel is as is provided by the introduction, now the ppm file I am trying to use is called cake.ppm and its 720x540, however when I build and run this code, I get the application is not responding. I tried it on a smaller ppm file which is 426x299 and it showed a window with colors being put on the window.
Why is it not working on the cake.ppm file and on others it works? Is it due to size?
When I try the ppm file, the second one 426x299 or other ppm files, the colors come totally different, why is that?
When I run the app, after the pixels are put, the window closes, how can I keep it?
Attempting at a file squares.ppm, here is what it should be:
But this is what I'm getting
I am writing a new framework for a game engine, but I'm stuck with one issue, OpenAL.
I'm usually using freealut for this, but i cant find it anywhere, the only site that hosted it is offline, and i don't have any copies of it. I even had to dissect some other guys' project to find openal32.lib. Either my google fu has grown weak, or the vast Internet really doesn't have any copies of it.
I found some example coding showing how to work openAL without the freealut framework, but i cant get it to load in multiple files, so i either have to find out why its not working, or somehow locate freealut, i found some sources for it in github, but at this moment, building freealut from source is out of the question.
I am using visual express c++ 2010 as the ide.
I modified the code I found, into this:
Basically its three commands to load, play and delete the sound files.
It works fine for one sound file, but when I try to load in more, it stops working.
#include "AudioLib.h"
#include <iostream>
#include <cstdlib>
#include <Windows.h>
#include <map>
#include <vector>
#include <AL\al.h>
#include <AL\alc.h>
using namespace std;
typedef map <const char *, ALuint > MapType;
MapType soundsbuffer;
MapType soundssource;
int endWithError(char* msg, int error=0)
{
//Display error message in console
cout << msg << "\n";
//system("PAUSE");
return error;
}
vector<const char *> soundslist;
ALCdevice *device;
ALCcontext *context;
int loadSound(const char * input) {
FILE *fp;
unsigned char* buf;
ALuint source;
ALuint buffer;
fp = NULL;
fp = fopen(input,"rb");
char type[4];
DWORD size,chunkSize;
short formatType,channels;
DWORD sampleRate, avgBytesPerSec;
short bytesPerSample, bitsPerSample;
DWORD dataSize;
//Check that the WAVE file is OK
fread(type,sizeof(char),4,fp); //Reads the first bytes in the file
if(type[0]!='R' || type[1]!='I' || type[2]!='F' || type[3]!='F') //Should be "RIFF"
return endWithError ("No RIFF"); //Not RIFF
fread(&size, sizeof(DWORD),1,fp); //Continue to read the file
fread(type, sizeof(char),4,fp); //Continue to read the file
if (type[0]!='W' || type[1]!='A' || type[2]!='V' || type[3]!='E') //This part should be "WAVE"
return endWithError("not WAVE"); //Not WAVE
fread(type,sizeof(char),4,fp); //Continue to read the file
if (type[0]!='f' || type[1]!='m' || type[2]!='t' || type[3]!=' ') //This part should be "fmt "
return endWithError("not fmt "); //Not fmt
//Now we know that the file is a acceptable WAVE file
//Info about the WAVE data is now read and stored
fread(&chunkSize,sizeof(DWORD),1,fp);
fread(&formatType,sizeof(short),1,fp);
fread(&channels,sizeof(short),1,fp);
fread(&sampleRate,sizeof(DWORD),1,fp);
fread(&avgBytesPerSec,sizeof(DWORD),1,fp);
fread(&bytesPerSample,sizeof(short),1,fp);
fread(&bitsPerSample,sizeof(short),1,fp);
fread(type,sizeof(char),4,fp);
if (type[0]!='d' || type[1]!='a' || type[2]!='t' || type[3]!='a') //This part should be "data"
return endWithError("Missing DATA"); //not data
fread(&dataSize,sizeof(DWORD),1,fp); //The size of the sound data is read
//Display the info about the WAVE file
cout << "Chunk Size: " << chunkSize << "\n";
cout << "Format Type: " << formatType << "\n";
cout << "Channels: " << channels << "\n";
cout << "Sample Rate: " << sampleRate << "\n";
cout << "Average Bytes Per Second: " << avgBytesPerSec << "\n";
cout << "Bytes Per Sample: " << bytesPerSample << "\n";
cout << "Bits Per Sample: " << bitsPerSample << "\n";
cout << "Data Size: " << dataSize << "\n";
buf= new unsigned char[dataSize]; //Allocate memory for the sound data
cout << fread(buf,sizeof(BYTE),dataSize,fp) << " bytes loaded\n"; //Read the sound data and display the
//number of bytes loaded.
//Should be the same as the Data Size if OK
//Now OpenAL needs to be initialized
//And an OpenAL Context
device = alcOpenDevice(NULL); //Open the device
if(!device) return endWithError("no sound device"); //Error during device oening
context = alcCreateContext(device, NULL); //Give the device a context
alcMakeContextCurrent(context); //Make the context the current
if(!context) return endWithError("no sound context"); //Error during context handeling
//Stores the sound data
ALuint frequency=sampleRate;; //The Sample Rate of the WAVE file
ALenum format=0; //The audio format (bits per sample, number of channels)
alGenBuffers(1, &buffer); //Generate one OpenAL Buffer and link to "buffer"
alGenSources(1, &source); //Generate one OpenAL Source and link to "source"
if(alGetError() != AL_NO_ERROR) return endWithError("Error GenSource"); //Error during buffer/source generation
//Figure out the format of the WAVE file
if(bitsPerSample == 8)
{
if(channels == 1)
format = AL_FORMAT_MONO8;
else if(channels == 2)
format = AL_FORMAT_STEREO8;
}
else if(bitsPerSample == 16)
{
if(channels == 1)
format = AL_FORMAT_MONO16;
else if(channels == 2)
format = AL_FORMAT_STEREO16;
}
if(!format) return endWithError("Wrong BitPerSample"); //Not valid format
alBufferData(buffer, format, buf, dataSize, frequency); //Store the sound data in the OpenAL Buffer
soundsbuffer[input] = buffer;
soundssource[input] = source;
soundslist.push_back(input);
if(alGetError() != AL_NO_ERROR) {
return endWithError("Error loading ALBuffer"); //Error during buffer loading
}
fclose(fp);
delete[] buf;
}
int playSound(const char * input) {
//Sound setting variables
ALfloat SourcePos[] = { 0.0, 0.0, 0.0 }; //Position of the source sound
ALfloat SourceVel[] = { 0.0, 0.0, 0.0 }; //Velocity of the source sound
ALfloat ListenerPos[] = { 0.0, 0.0, 0.0 }; //Position of the listener
ALfloat ListenerVel[] = { 0.0, 0.0, 0.0 }; //Velocity of the listener
ALfloat ListenerOri[] = { 0.0, 0.0, -1.0, 0.0, 1.0, 0.0 }; //Orientation of the listener
//First direction vector, then vector pointing up)
//Listener
alListenerfv(AL_POSITION, ListenerPos); //Set position of the listener
alListenerfv(AL_VELOCITY, ListenerVel); //Set velocity of the listener
alListenerfv(AL_ORIENTATION, ListenerOri); //Set orientation of the listener
ALuint source = soundssource[input];
ALuint buffer = soundsbuffer[input];
//Source
alSourcei (source, AL_BUFFER, buffer); //Link the buffer to the source
alSourcef (source, AL_PITCH, 1.0f ); //Set the pitch of the source
alSourcef (source, AL_GAIN, 1.0f ); //Set the gain of the source
alSourcefv(source, AL_POSITION, SourcePos); //Set the position of the source
alSourcefv(source, AL_VELOCITY, SourceVel); //Set the velocity of the source
alSourcei (source, AL_LOOPING, AL_FALSE ); //Set if source is looping sound
//PLAY
alSourcePlay(source); //Play the sound buffer linked to the source
if(alGetError() != AL_NO_ERROR) return endWithError("Error playing sound"); //Error when playing sound
//system("PAUSE"); //Pause to let the sound play
}
void deleteSound() {
//Clean-up
//Close the WAVE file
//Delete the sound data buffer
for(int i = 0; i < soundslist.size(); i++) {
const char * out = soundslist[i];
alDeleteSources(1, &soundssource[out]); //Delete the OpenAL Source
alDeleteBuffers(1, &soundsbuffer[out]);
}
//Delete the OpenAL Buffer
soundslist.clear();
alcMakeContextCurrent(NULL); //Make no context current
alcDestroyContext(context); //Destroy the OpenAL Context
alcCloseDevice(device);
}
So what I'm asking for:
I need either the freealut files, or some help with the code.
Any solutions?
Ok, the openal site seems to be partially back online.
For anyone who needs the link: http://connect.creativelabs.com/openal/Downloads/Forms/AllItems.aspx?RootFolder=http%3a%2f%2fconnect%2ecreativelabs%2ecom%2fopenal%2fDownloads%2fALUT&FolderCTID=0x01200073059C4C04B4D14B80686126F6C1A2E8