Related
I am trying to write the simplest possible compute shader in DirectX12 so that I can have a starting point for a real project. However, it seems like no matter what I do I am unable to get my GPU to process "1+1" and see the output. As there is almost no documentation on compute shaders, I figured my only option now is to query StackOverflow.
I wrote the following code using the D3D12nBodyGravity project. First I copied as much of the code over as verbatim as possible, fixed "small" things, and then once it was all working I started trimming the code down to the basics. I am using Visual Studio 2019.
myClass.cpp:
#include "pch.h"
#include "myClass.h"
#include <d3dcompiler.h> // D3DReadFileToBlob
#include "Common\DirectXHelper.h" // NAME_D3D12_OBJECT
#include "Common\Device.h"
#include <iostream>
// InterlockedCompareExchange returns the object's value if the
// comparison fails. If it is already 0, then its value won't
// change and 0 will be returned.
#define InterlockedGetValue(object) InterlockedCompareExchange(object, 0, 0)
myClass::myClass()
: m_frameIndex(0)
, m_UavDescriptorSize(0)
, m_renderContextFenceValue(0)
, m_frameFenceValues{} {
std::cout << "Initializing myClass" << std::endl;
m_FenceValue = 0;
//std::cout << "Calling DXGIDeclareAdapterRemovalSupport()" << std::endl;
//DX::ThrowIfFailed(DXGIDeclareAdapterRemovalSupport());
// Identify the device
std::cout << "Identifying the device" << std::endl;
auto m_device = Device::Get().GetDevice();
std::cout << "Leading the rendering pipeline dependencies" << std::endl;
// Load the rendering pipeline dependencies.
{
std::cout << " Creating the root signatures" << std::endl;
// Create the root signatures.
{
CD3DX12_ROOT_PARAMETER rootParameter;
rootParameter.InitAsUnorderedAccessView(0);
Microsoft::WRL::ComPtr<ID3DBlob> signature;
Microsoft::WRL::ComPtr<ID3DBlob> error;
CD3DX12_ROOT_SIGNATURE_DESC computeRootSignatureDesc(1, &rootParameter, 0, nullptr);
DX::ThrowIfFailed(D3D12SerializeRootSignature(&computeRootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error));
DX::ThrowIfFailed(m_device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&m_computeRootSignature)));
}
// Describe and create the command queue.
std::cout << " Describing and creating the command queue" << std::endl;
D3D12_COMMAND_QUEUE_DESC queueDesc = {};
queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
DX::ThrowIfFailed(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_commandQueue)));
NAME_D3D12_OBJECT(m_commandQueue);
std::cout << " Creating descriptor heaps" << std::endl;
// Create descriptor heaps.
{
// Describe and create a shader resource view (SRV) and unordered
// access view (UAV) descriptor heap.
D3D12_DESCRIPTOR_HEAP_DESC UavHeapDesc = {};
UavHeapDesc.NumDescriptors = DescriptorCount;
UavHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
UavHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
DX::ThrowIfFailed(m_device->CreateDescriptorHeap(&UavHeapDesc, IID_PPV_ARGS(&m_UavHeap)));
NAME_D3D12_OBJECT(m_UavHeap);
m_UavDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
std::cout << " Creating a command allocator for each frame" << std::endl;
// Create a command allocator for each frame.
for (UINT n = 0; n < FrameCount; n++) {
DX::ThrowIfFailed(m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocators[n])));
}
} // Load the rendering pipeline dependencies.
std::cout << "Loading the sample assets" << std::endl;
// Load the sample assets.
{
std::cout << " Creating the pipeline states, including compiling and loading shaders" << std::endl;
// Create the pipeline states, which includes compiling and loading shaders.
{
Microsoft::WRL::ComPtr<ID3DBlob> computeShader;
#if defined(_DEBUG)
// Enable better shader debugging with the graphics debugging tools.
UINT compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
#else
UINT compileFlags = 0;
#endif
// Load and compile the compute shader.
DX::ThrowIfFailed(D3DReadFileToBlob(L"ComputeShader.cso", &computeShader));
auto convert_blob_to_byte = [](Microsoft::WRL::ComPtr<ID3DBlob> blob) {
auto* p = reinterpret_cast<unsigned char*>(blob->GetBufferPointer());
auto n = blob->GetBufferSize();
std::vector<unsigned char> buff;
buff.reserve(n);
std::copy(p, p + n, std::back_inserter(buff));
return buff;
};
std::vector<BYTE> m_computeShader = convert_blob_to_byte(computeShader);
// Describe and create the compute pipeline state object (PSO).
D3D12_COMPUTE_PIPELINE_STATE_DESC computePsoDesc = {};
computePsoDesc.pRootSignature = m_computeRootSignature.Get();
computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(computeShader.Get());
DX::ThrowIfFailed(m_device->CreateComputePipelineState(&computePsoDesc, IID_PPV_ARGS(&m_computeState)));
NAME_D3D12_OBJECT(m_computeState);
}
std::cout << " Creating the command list" << std::endl;
// Create the command list.
DX::ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocators[m_frameIndex].Get(), m_computeState.Get(), IID_PPV_ARGS(&m_commandList)));
NAME_D3D12_OBJECT(m_commandList);
std::cout << " Initializing the data in the buffers" << std::endl;
// Initialize the data in the buffers.
{
data.resize(2);
for (unsigned int i = 0; i < data.size(); i++) {
data[i] = 0.0f;
}
const UINT dataSize = data.size() * sizeof(data[0]);
D3D12_HEAP_PROPERTIES defaultHeapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT);
D3D12_HEAP_PROPERTIES uploadHeapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD);
D3D12_HEAP_PROPERTIES readbackHeapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK);
D3D12_RESOURCE_DESC bufferDesc = CD3DX12_RESOURCE_DESC::Buffer(dataSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
D3D12_RESOURCE_DESC uploadBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(dataSize);
readbackBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(dataSize);
DX::ThrowIfFailed(m_device->CreateCommittedResource(
&defaultHeapProperties,
D3D12_HEAP_FLAG_NONE,
&bufferDesc,
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(&m_dataBuffer)));
m_dataBuffer.Get()->SetName(L"m_dataBuffer");
DX::ThrowIfFailed(m_device->CreateCommittedResource(
&uploadHeapProperties,
D3D12_HEAP_FLAG_NONE,
&uploadBufferDesc,
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&m_dataBufferUpload)));
m_dataBufferUpload.Get()->SetName(L"m_dataBufferUpload");
DX::ThrowIfFailed(m_device->CreateCommittedResource(
&readbackHeapProperties,
D3D12_HEAP_FLAG_NONE,
&readbackBufferDesc,
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(&m_dataBufferReadback)));
m_dataBufferReadback.Get()->SetName(L"m_dataBufferReadback");
NAME_D3D12_OBJECT(m_dataBuffer);
dataSubResource = {};
dataSubResource.pData = &data[0];
dataSubResource.RowPitch = dataSize;
dataSubResource.SlicePitch = dataSubResource.RowPitch;
UpdateSubresources<1>(m_commandList.Get(), m_dataBuffer.Get(), m_dataBufferUpload.Get(), 0, 0, 1, &dataSubResource);
m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_dataBuffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COMMON));
m_commandList->CopyResource(m_dataBufferReadback.Get(), m_dataBufferUpload.Get());
D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
uavDesc.Format = DXGI_FORMAT_UNKNOWN;
uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uavDesc.Buffer.FirstElement = 0;
uavDesc.Buffer.NumElements = 1;
uavDesc.Buffer.StructureByteStride = sizeof(data[0]);
uavDesc.Buffer.CounterOffsetInBytes = 0;
uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
CD3DX12_CPU_DESCRIPTOR_HANDLE uavHandle0(m_UavHeap->GetCPUDescriptorHandleForHeapStart(), Uav, m_UavDescriptorSize);
m_device->CreateUnorderedAccessView(m_dataBuffer.Get(), nullptr, &uavDesc, uavHandle0);
} // Initialize the data in the buffers.
std::cout << " Closing the command list and executing it to begind the initial GPU setup" << std::endl;
// Close the command list and execute it to begin the initial GPU setup.
DX::ThrowIfFailed(m_commandList->Close());
ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() };
m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
std::cout << " Creating synchronization objects and wait until assets have been uploaded to the GPU" << std::endl;
// Create synchronization objects and wait until assets have been uploaded to the GPU.
{
DX::ThrowIfFailed(m_device->CreateFence(m_renderContextFenceValue, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_renderContextFence)));
m_renderContextFenceValue++;
m_renderContextFenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
if (m_renderContextFenceEvent == nullptr) {
DX::ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()));
}
// Add a signal command to the queue.
DX::ThrowIfFailed(m_commandQueue->Signal(m_renderContextFence.Get(), m_renderContextFenceValue));
// Instruct the fence to set the event object when the signal command completes.
DX::ThrowIfFailed(m_renderContextFence->SetEventOnCompletion(m_renderContextFenceValue, m_renderContextFenceEvent));
m_renderContextFenceValue++;
// Wait until the signal command has been processed.
WaitForSingleObject(m_renderContextFenceEvent, INFINITE);
}
} // Load the sample assets.
std::cout << "Creating compute resources" << std::endl;
{
// Create compute resources.
D3D12_COMMAND_QUEUE_DESC queueDesc = { D3D12_COMMAND_LIST_TYPE_COMPUTE, 0, D3D12_COMMAND_QUEUE_FLAG_NONE };
DX::ThrowIfFailed(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_computeCommandQueue)));
DX::ThrowIfFailed(m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE, IID_PPV_ARGS(&m_computeAllocator)));
DX::ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE, m_computeAllocator.Get(), nullptr, IID_PPV_ARGS(&m_computeCommandList)));
DX::ThrowIfFailed(m_device->CreateFence(0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&m_Fence)));
m_FenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
if (m_FenceEvent == nullptr) {
DX::ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()));
}
}
std::cout << "Calculating" << std::endl;
Calculate();
std::cout << "Finished" << std::endl;
}
void myClass::Calculate() {
m_computeCommandList.Get()->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_dataBuffer.Get(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_UNORDERED_ACCESS));
m_computeCommandList.Get()->SetPipelineState(m_computeState.Get());
m_computeCommandList.Get()->SetComputeRootSignature(m_computeRootSignature.Get());
ID3D12DescriptorHeap* ppHeaps[] = { m_UavHeap.Get() };
m_computeCommandList.Get()->SetDescriptorHeaps(_countof(ppHeaps), ppHeaps);
CD3DX12_GPU_DESCRIPTOR_HANDLE uavHandle(m_UavHeap->GetGPUDescriptorHandleForHeapStart(), Uav, m_UavDescriptorSize);
m_computeCommandList.Get()->SetComputeRootUnorderedAccessView(ComputeRootUAVTable, m_dataBuffer->GetGPUVirtualAddress());
m_computeCommandList.Get()->Dispatch(1, 1, 1);
m_computeCommandList.Get()->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_dataBuffer.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COMMON));
// Close and execute the command list.
DX::ThrowIfFailed(m_computeCommandList.Get()->Close());
ID3D12CommandList* commandLists[] = { m_computeCommandList.Get() };
m_computeCommandQueue->ExecuteCommandLists(1, commandLists);
// Wait for the compute shader to complete the calculation.
UINT64 FenceValue = InterlockedIncrement(&m_FenceValue);
DX::ThrowIfFailed(m_computeCommandQueue.Get()->Signal(m_Fence.Get(), FenceValue));
DX::ThrowIfFailed(m_Fence.Get()->SetEventOnCompletion(FenceValue, m_FenceEvent));
WaitForSingleObject(m_FenceEvent, INFINITE);
std::cout << "FenceValue = " << FenceValue << " " << m_FenceValue << " " << m_Fence.Get()->GetCompletedValue() << std::endl;
// Check the output!
float* dataptr = nullptr;
D3D12_RANGE range = { 0, readbackBufferDesc.Width };
DX::ThrowIfFailed(m_dataBufferReadback->Map(0, &range, (void**)&dataptr));
for (int i = 0; i < readbackBufferDesc.Width / sizeof(data[0]); i++)
printf("uav[%d] = %.2f\n", i, dataptr[i]);
m_dataBufferReadback->Unmap(0, nullptr);
for (unsigned int i = 0; i < data.size(); i++) {
std::cout << "data[" << i << "] = " << data[i] << std::endl;
}
}
myClass.h:
#pragma once
#include "Common\Device.h"
#include <iostream>
// We have to write all of this as its own class, otherwise we cannot
// use the "this" pointer when we create compute resources. We need to
// do that because this code tagets multithreading.
class myClass {
public:
myClass();
private:
// Two buffers full of data are used. The compute thread alternates
// writing to each of them. The render thread renders using the
// buffer that is not currently in use by the compute shader.
//struct Data {
// float c;
//};
//std::vector<Data> data;
std::vector<float> data;
// For the compute pipeline, the CBV is a struct containing some
// constants used in the compute shader.
struct ConstantBufferCS {
float a;
float b;
};
D3D12_SUBRESOURCE_DATA dataSubResource;
static const UINT FrameCount = 1;
//static const UINT ThreadCount = 1;
UINT m_heightInstances;
UINT m_widthInstances;
UINT m_frameIndex;
Microsoft::WRL::ComPtr<ID3D12RootSignature> m_rootSignature;
Microsoft::WRL::ComPtr<ID3D12RootSignature> m_computeRootSignature;
Microsoft::WRL::ComPtr<ID3D12CommandQueue> m_commandQueue;
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> m_UavHeap;
Microsoft::WRL::ComPtr<ID3D12CommandAllocator> m_commandAllocators[FrameCount];
Microsoft::WRL::ComPtr<ID3D12PipelineState> m_computeState;
Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> m_commandList;
Microsoft::WRL::ComPtr<ID3D12Resource> m_constantBufferCS;
UINT64 m_renderContextFenceValue;
HANDLE m_renderContextFenceEvent;
UINT64 m_frameFenceValues[FrameCount];
UINT m_UavDescriptorSize;
ConstantBufferCS constantBufferCS;
Microsoft::WRL::ComPtr<ID3D12Resource> constantBufferCSUpload;
Microsoft::WRL::ComPtr<ID3D12Fence> m_renderContextFence;
Microsoft::WRL::ComPtr<ID3D12Resource> m_dataBuffer;
Microsoft::WRL::ComPtr<ID3D12Resource> m_dataBufferUpload;
Microsoft::WRL::ComPtr<ID3D12Resource> m_dataBufferReadback;
// Compute objects.
Microsoft::WRL::ComPtr<ID3D12CommandAllocator> m_computeAllocator;
Microsoft::WRL::ComPtr<ID3D12CommandQueue> m_computeCommandQueue;
Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> m_computeCommandList;
Microsoft::WRL::ComPtr<ID3D12Fence> m_Fence;
volatile HANDLE m_FenceEvent;
D3D12_RESOURCE_DESC readbackBufferDesc;
// State
UINT64 volatile m_FenceValue;
/*
struct ThreadData {
myClass* pContext;
UINT threadIndex;
};
ThreadData m_threadData;
HANDLE m_threadHandles;
*/
void Calculate();
// Indices of shader resources in the descriptor heap.
enum DescriptorHeapIndex : UINT32 {
Uav = 0,
DescriptorCount = 1
};
enum ComputeRootParameters : UINT32 {
//ComputeRootCBV = 0,
ComputeRootUAVTable = 0,
ComputeRootParametersCount
};
};
Device.cpp:
#pragma once
#include "pch.h"
#include "Device.h"
#include "DirectXHelper.h"
#include <cassert> // for "assert"
#include <iostream>
static Device* gs_pSingelton = nullptr;
// Constructor
Device::Device(HINSTANCE hInst, bool useWarp)
: m_hInstance(hInst)
, m_useWarp(useWarp)
{
}
void Device::Initialize() {
#if defined(_DEBUG)
// Always enable the debug layer before doing anything DX12 related
// so all possible errors generated while creating DX12 objects
// are caught by the debug layer.
Microsoft::WRL::ComPtr<ID3D12Debug1> debugInterface;
DX::ThrowIfFailed(D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)));
debugInterface->EnableDebugLayer();
// Enable these if you want full validation (will slow down rendering a lot).
//debugInterface->SetEnableGPUBasedValidation(TRUE);
//debugInterface->SetEnableSynchronizedCommandQueueValidation(TRUE);
#endif
auto dxgiAdapter = GetAdapter(false);
if (!dxgiAdapter) { // If no supporting DX12 adapters exist, fall back to WARP
dxgiAdapter = GetAdapter(true);
}
if (dxgiAdapter) {
m_device = CreateDevice(dxgiAdapter);
}
else {
throw std::exception("DXGI adapter enumeration failed.");
}
}
void Device::Create(HINSTANCE hInst) {
if (!gs_pSingelton) {
gs_pSingelton = new Device(hInst);
gs_pSingelton->Initialize();
}
}
Device& Device::Get() {
assert(gs_pSingelton);
return *gs_pSingelton;
}
void Device::Destroy() {
if (gs_pSingelton) {
delete gs_pSingelton;
gs_pSingelton = nullptr;
}
}
// Destructor
Device::~Device() {
}
Microsoft::WRL::ComPtr<ID3D12Device2> Device::CreateDevice(Microsoft::WRL::ComPtr<IDXGIAdapter4> adapter) {
Microsoft::WRL::ComPtr<ID3D12Device2> d3d12Device2;
DX::ThrowIfFailed(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&d3d12Device2)));
// Enable debug messages in debug mode.
#if defined(_DEBUG)
Microsoft::WRL::ComPtr<ID3D12InfoQueue> pInfoQueue;
if (SUCCEEDED(d3d12Device2.As(&pInfoQueue))) {
pInfoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE);
pInfoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE);
pInfoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE);
// Suppress whole categories of messages
//D3D12_MESSAGE_CATEGORY Categories[] = {};
// Suppress messages based on their severity level
D3D12_MESSAGE_SEVERITY Severities[] = { D3D12_MESSAGE_SEVERITY_INFO };
// Suppress individual messages by their ID
D3D12_MESSAGE_ID DenyIds[] = {
D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, // I'm really not sure how to avoid this message.
D3D12_MESSAGE_ID_MAP_INVALID_NULLRANGE, // This warning occurs when using capture frame while graphics debugging.
D3D12_MESSAGE_ID_UNMAP_INVALID_NULLRANGE, // This warning occurs when using capture frame while graphics debugging.
};
D3D12_INFO_QUEUE_FILTER NewFilter = {};
//NewFilter.DenyList.NumCategories = _countof(Categories);
//NewFilter.DenyList.pCategoryList = Categories;
NewFilter.DenyList.NumSeverities = _countof(Severities);
NewFilter.DenyList.pSeverityList = Severities;
NewFilter.DenyList.NumIDs = _countof(DenyIds);
NewFilter.DenyList.pIDList = DenyIds;
DX::ThrowIfFailed(pInfoQueue->PushStorageFilter(&NewFilter));
}
#endif
return d3d12Device2;
}
Microsoft::WRL::ComPtr<IDXGIAdapter4> Device::GetAdapter(bool useWarp) {
UINT createFactoryFlags = 0;
#if defined(_DEBUG)
createFactoryFlags = DXGI_CREATE_FACTORY_DEBUG;
#endif
DX::ThrowIfFailed(CreateDXGIFactory2(createFactoryFlags, IID_PPV_ARGS(&m_factory)));
Microsoft::WRL::ComPtr<IDXGIAdapter1> dxgiAdapter1;
Microsoft::WRL::ComPtr<IDXGIAdapter4> dxgiAdapter4;
if (useWarp) {
DX::ThrowIfFailed(m_factory->EnumWarpAdapter(IID_PPV_ARGS(&dxgiAdapter1)));
DX::ThrowIfFailed(dxgiAdapter1.As(&dxgiAdapter4));
}
else {
SIZE_T maxDedicatedVideoMemory = 0;
for (UINT i = 0; m_factory->EnumAdapters1(i, &dxgiAdapter1) != DXGI_ERROR_NOT_FOUND; ++i) {
DXGI_ADAPTER_DESC1 dxgiAdapterDesc1;
dxgiAdapter1->GetDesc1(&dxgiAdapterDesc1);
// Check to see if the adapter can create a D3D12 device without actually
// creating it. The adapter with the largest dedicated video memory
// is favored.
if ((dxgiAdapterDesc1.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) == 0 &&
SUCCEEDED(D3D12CreateDevice(dxgiAdapter1.Get(),
D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device), nullptr)) &&
dxgiAdapterDesc1.DedicatedVideoMemory > maxDedicatedVideoMemory) {
maxDedicatedVideoMemory = dxgiAdapterDesc1.DedicatedVideoMemory;
DX::ThrowIfFailed(dxgiAdapter1.As(&dxgiAdapter4));
}
}
}
return dxgiAdapter4;
}
Device.h:
#pragma once
#include <dxgi1_6.h> // IDXGIAdapter4
// We require this file because we are unable to pass the device pointer to everywhere we need to.
class Device {
public:
/**
* Create the device singleton with the device instance handle.
*/
static void Create(HINSTANCE hInst);
/**
* Destroy the device instance.
*/
static void Destroy();
/**
* Get the device singleton.
*/
static Device& Get();
/**
* Get the Direct3D 12 device
*/
Microsoft::WRL::ComPtr<ID3D12Device2> GetDevice() const { return m_device; }
Microsoft::WRL::ComPtr<IDXGIFactory4> GetFactory() const { return m_factory; }
protected:
// Create a device instance
Device(HINSTANCE hInst, bool useWarp = false);
// Destroy the device instance.
virtual ~Device();
// Initialize the device instance.
void Initialize();
Microsoft::WRL::ComPtr<IDXGIAdapter4> GetAdapter(bool useWarp);
Microsoft::WRL::ComPtr<ID3D12Device2> CreateDevice(Microsoft::WRL::ComPtr<IDXGIAdapter4> adapter);
private:
Device(const Device& copy) = delete;
Device& operator=(const Device& other) = delete;
HINSTANCE m_hInstance;
Microsoft::WRL::ComPtr<ID3D12Device2> m_device;
Microsoft::WRL::ComPtr<IDXGIFactory4> m_factory;
bool m_useWarp;
};
ComputeShader.hlsl:
RWStructuredBuffer<float> output : register(u0); // UAV
[numthreads(1, 1, 1)]
void main( uint3 DTid : SV_DispatchThreadID ) {
output[DTid.x] = 1 + 1;
}
Please let me know if you are able to find what I do not understand. I can also try uploading my project to GitHub if it helps... SOS :(
How do I take images with Sapera SDK and transfer the image data from the SapBuffer object to vector?
To handle images taken by camera using Sapera, you should make specialization of SapProcessing class, which is used to process buffers. Otherwise the buffer is cleared automatically after each frame, and you lose the data.
The imaging process goes as follows:
You call Grab() on the camera object to start imaging
After each frame has been taken, transfer callback is called. Here you request your SapProcessing object to process the next frame.
Run() function of your SapProcessing object is called. Here you can read data from the buffer.
After Run() function, processing callback is called.
When you have received enough frames, call Freeze() to stop imaging.
This example code takes images using default settings on the camera (monochrome 8-bit pixel format).
#include <string>
#include <vector>
#include <memory>
#include <stdexcept>
#include <iostream>
#include <iomanip>
#include <atomic>
#include "SapClassBasic.h"
// Helper function to find the camera by its serial number
SapAcqDevice getDeviceBySN(const std::string& sn)
{
char serverName[CORSERVER_MAX_STRLEN];
char serialNumberName[2048];
const int serverCount = SapManager::GetServerCount();
for (int i = 0; i < serverCount; i++) {
if (SapManager::GetResourceCount(i, SapManager::ResourceAcqDevice) != 0)
{
SapManager::GetServerName(i, serverName, sizeof(serverName));
SapAcqDevice camera(serverName);
if (!camera.Create()) {
throw std::runtime_error("Failed to create camera object.");
}
int featureCount;
if (camera.GetFeatureCount(&featureCount) && featureCount > 0)
{
if (camera.GetFeatureValue("DeviceID", serialNumberName, sizeof(serialNumberName))
&& serialNumberName == sn)
{
return camera;
}
}
camera.Destroy();
}
}
const auto errorStr = "Camera \"" + sn + "\" was not found.";
throw std::runtime_error(errorStr.c_str());
}
class SapMyProcessing : public SapProcessing
{
public:
SapMyProcessing(SapBuffer* pBuffers, SapProCallback pCallback, void* pContext);
virtual ~SapMyProcessing();
protected:
virtual BOOL Run();
};
SapMyProcessing::SapMyProcessing(SapBuffer* pBuffers, SapProCallback pCallback, void* pContext)
: SapProcessing(pBuffers, pCallback, pContext)
{}
SapMyProcessing::~SapMyProcessing()
{
if (m_bInitOK) Destroy();
}
BOOL SapMyProcessing::Run()
{
// Get the current buffer index
const int proIndex = GetIndex();
// If this is not true, buffer has overflown
SapBuffer::State state;
bool goodContent = m_pBuffers->GetState(proIndex, &state)
&& state == SapBuffer::StateFull;
if (goodContent) {
void *inAddress = nullptr;
m_pBuffers->GetAddress(proIndex, &inAddress);
int inSize = 0;
m_pBuffers->GetSpaceUsed(proIndex, &inSize);
// Width, height and pixel format are received from the camera
const int width = m_pBuffers->GetWidth();
const int height = m_pBuffers->GetHeight();
const auto format = m_pBuffers->GetFormat();
const int outSize = width * height;
// Skip unexpected pixel format or incomplete frame
goodContent = format == SapFormatMono8
&& inSize == outSize;
if (goodContent) {
// Copy data to vector
std::vector<uint8_t> outBuffer(outSize);
std::copy((uint8_t*)inAddress, (uint8_t*)(inAddress) + outSize, outBuffer.begin());
// Print the first line
for (int i = 0; i < width; i++) {
std::cout << std::hex << int(outBuffer[i]);
}
std::cout << std::endl << std::endl;
}
}
return TRUE;
}
// Information to pass to callbacks
struct TransferContext {
std::atomic_int frameGrabCount = 0, frameProcessingCount = 0;
std::shared_ptr<SapMyProcessing> processing;
};
void transferCallback(SapXferCallbackInfo *info)
{
auto context = (TransferContext*)info->GetContext();
context->frameGrabCount++;
if (!info->IsTrash()) {
// Execute Run() for this frame
context->processing->ExecuteNext();
}
}
// Processing callback is called after Run()
void processingCallback(SapProCallbackInfo* info)
{
auto context = (TransferContext*)info->GetContext();
// Processing has finished
context->frameProcessingCount++;
}
// The main imaging function
void grab(const std::string& serialNumber)
{
// Number of frames to receive from the camera
const int maxFrameCount = 10;
TransferContext context;
auto camera = getDeviceBySN(serialNumber);
std::unique_ptr<SapBuffer> buffer
= std::make_unique<SapBufferWithTrash>(maxFrameCount, &camera);
std::unique_ptr<SapTransfer> transfer
= std::make_unique<SapAcqDeviceToBuf>(&camera, buffer.get(), transferCallback, &context);
context.processing = std::make_shared<SapMyProcessing>(buffer.get(), processingCallback, &context);
auto cleanup = [&]() {
if (context.processing) context.processing->Destroy();
if (transfer) transfer->Destroy();
if (buffer) buffer->Destroy();
camera.Destroy();
};
try {
if (!buffer->Create()) {
throw std::runtime_error("Failed to create buffer object.");
}
if (!transfer->Create()) {
throw std::runtime_error("Failed to create transfer object.");
}
if (!context.processing->Create()) {
throw std::runtime_error("Failed to create processing object.");
}
transfer->SetAutoEmpty(false);
context.processing->SetAutoEmpty(true);
context.processing->Init();
transfer->Grab();
// Wait for the camera to grab all frames
while (context.frameGrabCount < maxFrameCount);
transfer->Freeze();
if (!transfer->Wait(5000)) {
throw std::runtime_error("Failed to stop grab.");
}
// Wait for processing to complete
while (context.frameProcessingCount < maxFrameCount);
cleanup();
}
catch (...) {
cleanup();
throw;
}
}
WINDOWS 10, amd64
Built tensorflow GPU enabled C++ static libraries with CMAKE GUI + MSBUILD
Built successful.
LABEL_IMAGE tutorial example execution times :
... Main.cc execution : 9.17 secs
... Label_image.py execution (tensorflow) : 10.34 secs
... Label_image.py execution (tensorflow-gpu) : 1.62 secs
Any idea why ? Thanks a lot
Main.cc with minor customizations :
#define NOMINMAX
#include <fstream>
#include <utility>
#include <vector>
#include "tensorflow/cc/ops/const_op.h"
#include "tensorflow/cc/ops/image_ops.h"
#include "tensorflow/cc/ops/standard_ops.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/graph/default_device.h"
#include "tensorflow/core/graph/graph_def_builder.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/stringpiece.h"
#include "tensorflow/core/lib/core/threadpool.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/lib/strings/stringprintf.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/init_main.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/util/command_line_flags.h"
// These are all common classes it's handy to reference with no namespace.
using tensorflow::Flag;
using tensorflow::Tensor;
using tensorflow::Status;
using tensorflow::string;
using tensorflow::int32;
static Status ReadEntireFile(tensorflow::Env* env, const string& filename, Tensor* output) {
tensorflow::uint64 file_size = 0;
TF_RETURN_IF_ERROR(env->GetFileSize(filename, &file_size));
string contents;
contents.resize(file_size);
std::unique_ptr<tensorflow::RandomAccessFile> file;
TF_RETURN_IF_ERROR(env->NewRandomAccessFile(filename, &file));
tensorflow::StringPiece data;
TF_RETURN_IF_ERROR(file->Read(0, file_size, &data, &(contents)[0]));
if (data.size() != file_size) {
return tensorflow::errors::DataLoss("Truncated read of '", filename, "' expected ", file_size, " got ", data.size());
}
output->scalar<string>()() = data.ToString();
return Status::OK();
}
// Given an image file name, read in the data, try to decode it as an image,
// resize it to the requested size, and then scale the values as desired.
Status ReadTensorFromImageFile(const string file_name, const int input_height, const int input_width, const float input_mean, const float input_std, std::vector<Tensor>* out_tensors) {
auto root = tensorflow::Scope::NewRootScope();
using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
string input_name = "file_reader";
string output_name = "dim";
// read file_name into a tensor named input
Tensor input(tensorflow::DT_STRING, tensorflow::TensorShape());
TF_RETURN_IF_ERROR(ReadEntireFile(tensorflow::Env::Default(), file_name, &input));
// use a placeholder to read input data
auto file_reader = Placeholder(root.WithOpName("input"), tensorflow::DataType::DT_STRING);
std::vector<std::pair<string, tensorflow::Tensor>> inputs = { { "input", input }, };
// Now try to figure out what kind of file it is and decode it.
const int wanted_channels = 3;
tensorflow::Output image_reader;
if (tensorflow::StringPiece(file_name).ends_with(".png")) {
image_reader = DecodePng(root.WithOpName("png_reader"), file_reader, DecodePng::Channels(wanted_channels));
}
else if (tensorflow::StringPiece(file_name).ends_with(".gif")) {
// gif decoder returns 4-D tensor, remove the first dim
image_reader = Squeeze(root.WithOpName("squeeze_first_dim"), DecodeGif(root.WithOpName("gif_reader"), file_reader));
}
else if (tensorflow::StringPiece(file_name).ends_with(".bmp")) {
image_reader = DecodeBmp(root.WithOpName("bmp_reader"), file_reader);
}
else {
// Assume if it's neither a PNG nor a GIF then it must be a JPEG.
image_reader = DecodeJpeg(root.WithOpName("jpeg_reader"), file_reader, DecodeJpeg::Channels(wanted_channels));
}
// Now cast the image data to float so we can do normal math on it.
auto uint8_caster = Cast(root.WithOpName("uint8_caster"), image_reader, tensorflow::DT_UINT8);
// The convention for image ops in TensorFlow is that all images are expected
// to be in batches, so that they're four-dimensional arrays with indices of
// [batch, height, width, channel]. Because we only have a single image, we
// have to add a batch dimension of 1 to the start with ExpandDims().
auto dims_expander = ExpandDims(root.WithOpName(output_name), uint8_caster, 0);
// Bilinearly resize the image to fit the required dimensions.
//auto resized = ResizeBilinear(root, dims_expander,Const(root.WithOpName("size"), { input_height, input_width }));
// Subtract the mean and divide by the scale.
//Div(root.WithOpName(output_name), Sub(root, resized, { input_mean }),{ input_std });
// This runs the GraphDef network definition that we've just constructed, and
// returns the results in the output tensor.
tensorflow::GraphDef graph;
TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
tensorflow::SessionOptions options;
std::unique_ptr<tensorflow::Session> session(tensorflow::NewSession(options));
TF_RETURN_IF_ERROR(session->Create(graph));
TF_RETURN_IF_ERROR(session->Run({ inputs }, { output_name }, {}, out_tensors));
return Status::OK();
}
// Reads a model graph definition from disk, and creates a session object you
// can use to run it.
Status LoadGraph(const string& graph_file_name, std::unique_ptr<tensorflow::Session>* session) {
tensorflow::GraphDef graph_def;
Status load_graph_status = ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def);
if (!load_graph_status.ok()) {return tensorflow::errors::NotFound("Failed to load compute graph at '",graph_file_name, "'");}
tensorflow::SessionOptions options;
session->reset(tensorflow::NewSession(options));
Status session_create_status = (*session)->Create(graph_def);
if (!session_create_status.ok()) {return session_create_status; }
return Status::OK();
}
int main(int argc, char* argv[]) {
// These are the command-line flags the program can understand.
// They define where the graph and input data is located, and what kind of
// input the model expects. If you train your own model, or use something
// other than inception_v3, then you'll need to update these.
string image = "tensorflow/examples/label_image/data/grace_hopper.jpg";
string graph = "tensorflow/examples/label_image/data/faster_rcnn_resnet101_coco_11_06_2017/frozen_inference_graph.pb";
string labels = "/tensorflow/tensorflow/examples/label_image/data/faster_rcnn_resnet101_coco_11_06_2017/graph.pbtxt";
int32 input_width = 299;
int32 input_height = 299;
float input_mean = 0;
float input_std = 255;
string input_layer = "image_tensor:0";
std::vector<string> output_layer = { "detection_boxes:0", "detection_scores:0", "detection_classes:0", "num_detections:0" };
string o_layer = "detection_boxes:0, detection_scores : 0, detection_classes : 0, num_detections : 0"; //dummy for Flag structure
bool self_test = false;
string root_dir = "/tensorflow/";
std::vector<Flag> flag_list = {
Flag("image", &image, "image to be processed"),
Flag("graph", &graph, "graph to be executed"),
Flag("labels", &labels, "name of file containing labels"),
Flag("input_width", &input_width, "resize image to this width in pixels"),
Flag("input_height", &input_height,
"resize image to this height in pixels"),
Flag("input_mean", &input_mean, "scale pixel values to this mean"),
Flag("input_std", &input_std, "scale pixel values to this std deviation"),
Flag("input_layer", &input_layer, "name of input layer"),
Flag("output_layer", &o_layer, "name of output layer"),
Flag("self_test", &self_test, "run a self test"),
Flag("root_dir", &root_dir,
"interpret image and graph file names relative to this directory"),
};
string usage = tensorflow::Flags::Usage(argv[0], flag_list);
const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
if (!parse_result) {
LOG(ERROR) << usage;
return -1;
}
// We need to call this to set up global state for TensorFlow.
tensorflow::port::InitMain(argv[0], &argc, &argv);
if (argc > 1) {
LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage;
return -1;
}
// First we load and initialize the model.
std::unique_ptr<tensorflow::Session> session;
string graph_path = tensorflow::io::JoinPath(root_dir, graph);
Status load_graph_status = LoadGraph(graph_path, &session);
if (!load_graph_status.ok()) {
LOG(ERROR) << load_graph_status;
return -1;
}
// Get the image from disk as a float array of numbers, resized and normalized
// to the specifications the main graph expects.
std::vector<Tensor> resized_tensors;
string image_path = tensorflow::io::JoinPath(root_dir, image);
//-------------------------------------
LOG(ERROR) << "Detection Basla....";
Status read_tensor_status = ReadTensorFromImageFile(image_path, input_height, input_width, input_mean, input_std, &resized_tensors);
if (!read_tensor_status.ok()) {
LOG(ERROR) << read_tensor_status;
return -1;
}
const Tensor resized_tensor = resized_tensors[0];
// Actually run the image through the model.
std::vector<Tensor> outputs;
Status run_status = session->Run({ { input_layer, resized_tensor } }, { output_layer }, {}, &outputs);
LOG(ERROR) << "Detection Bit......";
//-----------------------------------------
if (!run_status.ok()) {
LOG(ERROR) << "Running model failed: " << run_status;
return -1;
}
tensorflow::TTypes<float>::Flat scores = outputs[1].flat<float>();
tensorflow::TTypes<float>::Flat classes = outputs[2].flat<float>();
tensorflow::TTypes<float>::Flat num_detections = outputs[3].flat<float>();
auto boxes = outputs[0].flat_outer_dims<float, 3>();
LOG(ERROR) << "num_detections:" << num_detections(0) << "," << outputs[0].shape().DebugString();
for (size_t i = 0; i < num_detections(0) && i < 20; ++i)
{
if (scores(i) > 0.5)
{
LOG(ERROR) << i << ",score:" << scores(i) << ",class:" << classes(i) << ",box:" << "," << boxes(0, i, 0) << "," << boxes(0, i, 1) << "," << boxes(0, i, 2) << "," << boxes(0, i, 3);
}
}
return 0;
}
After successful built I ran the code and got a "_pywrap_tensorflow_internal.pyd not found" message.
I searched PC and found one in phython/tensorflow path.
I copied that one to execution path and everything was ok except gpu usage
Suddenly something whispered me ;
"Hey you immortal !! you should get recently generated
pywrap_tensorflow_internal.dll and rename it _pywrap_tensorflow_internal.pyd
and copy it to execution path.
GPU is being used
How I can pass a variable from a C++ program to another?
the variable that I need to pass is a string.
This is the C++ file in which I have to send the string:
#include <string>
#include <iostream>
#include <ros/ros.h>
#include <json_prolog/prolog.h>
using namespace std;
using namespace json_prolog;
int main(int argc, char *argv[])
{
ros::init(argc, argv, "Info");
Prolog pl;
int c=0;
do
{
int i=0;
std::string M;
cout<<"Declare the name of the class of interest"<< "\t";
cin>>M;
if (M=="knife")
.........
In this program I decide what string M is, but I want that this M comes from the output of another cpp file that obviously has to give a string as output.
this is the c++ that has to send me a string:
#include<aruco_marker_detector/arucoMarkerDetector.h>
namespace MarkerDetector {
void FilterButter(Vector3d &s, Vector3d &sf, Vector3d &bButter, Vector3d &aButter)
{
double r,rf;
r=bButter.transpose()*s;
rf=aButter.transpose()*sf;
sf(0)=r-rf;
s(2)=s(1);
s(1)=s(0);
sf(2)=sf(1);
sf(1)=sf(0);
}
MarkerTracker::MarkerTracker(ros::NodeHandle &n)
{
this->nh = n;//dopo questa istruzione l'indirizzo che contiene l'id del nodo n e' salvato in nh
this->it = new image_transport::ImageTransport(this->nh);//salva in &it l'indirizzo della funzione ImageTransport(this->nh) appartenente al nuovo namespace image_transport
// ros::Duration r(1);
XmlRpc::XmlRpcValue my_list;
nh.getParam("marker_ids", my_list);
for (int32_t i = 0; i < my_list.size(); ++i)
{
this->markerIDs.push_back(-1);
//ROS_ASSERT(my_list[i].getType() == XmlRpc::XmlRpcValue::TypeDouble);
this->markerIDs[i]=static_cast<int>(my_list[i]);
//ROS_ERROR_STREAM("markerIDs["<<i<<"]: "<<this->markerIDs[i]);
}
//r.sleep();
nh.getParam("marker_labels", my_list);
for (int32_t i = 0; i < my_list.size(); ++i)
{
this->markerLables.push_back("NotSet");
//ROS_ASSERT(my_list[i].getType() == XmlRpc::XmlRpcValue::TypeDouble);
this->markerLables[i]=static_cast<std::string>(my_list[i]);
//ROS_ERROR_STREAM("markerLables["<<i<<"]: "<<this->markerLables[i]);
}
//r.sleep();
this->markerTrackerID=-1;
//
//Load Parameters (rosparameters)
nh.getParam("marker_tracker_id",this->markerTrackerID);
//nh.getParam("marker_id",this->markerID);
nh.getParam("camera_info_url",this->cameraParametersFile);
nh.getParam("marker_size",this->markerSize);
nh.getParam("block_size",this->thresParam1);
nh.getParam("sub_constant",this->thresParam2);
nh.getParam("camera_reference_frame",this->cameraReferenceFrame);
nh.getParam("filter_coefficient_B", my_list);
for (int32_t i = 0; i < my_list.size(); ++i)
{
//ROS_ASSERT(my_list[i].getType() == XmlRpc::XmlRpcValue::TypeDouble);
this->B(i)=static_cast<double>(my_list[i]);
}
nh.getParam("filter_coefficient_A", my_list);
for (int32_t i = 0; i < my_list.size(); ++i)
{
//ROS_ASSERT(my_list[i].getType() == XmlRpc::XmlRpcValue::TypeDouble);
this->A(i)=static_cast<double>(my_list[i]);
}
nh.getParam("image_topic_name_raw",this->imageTopicRaw);
nh.getParam("image_topic_name_proc",this->imageTopicProcessed);
nh.getParam("camera_name_tag",this->cameraNameTag);
nh.getParam("broadcast_tf_flag",this->broadcastTF);
nh.getParam("camera_extrinsics",my_list);
VectorXd in(16);
this->TC_torso.Identity();
for (int32_t i = 0; i < my_list.size(); ++i)
{
in(i)=static_cast<double>(my_list[i]);
}
ROS_WARN_STREAM("in: \n"<<in.transpose());
// r.sleep();
// this->TC_torso.matrix()(0,0)=in(0*4+0);
// this->TC_torso.matrix()(0,1)=in(0*4+1);
// this->TC_torso.matrix()(0,2)=in(0*4+2);
// this->TC_torso.matrix()(0,3)=in(0*4+3);
// this->TC_torso.matrix()(1,0)=in(1*4+0);
// this->TC_torso.matrix()(1,1)=in(1*4+1);
// this->TC_torso.matrix()(1,2)=in(1*4+2);
// this->TC_torso.matrix()(1,3)=in(1*4+3);
// this->TC_torso.matrix()(2,0)=in(2*4+0);
// this->TC_torso.matrix()(2,1)=in(2*4+1);
// this->TC_torso.matrix()(2,2)=in(2*4+2);
// this->TC_torso.matrix()(2,3)=in(2*4+3);
// this->TC_torso.matrix()(3,0)=in(3*4+0);
// this->TC_torso.matrix()(3,1)=in(3*4+1);
// this->TC_torso.matrix()(3,2)=in(3*4+2);
// this->TC_torso.matrix()(3,3)=in(3*4+3);
for(unsigned int i=0;i<4;i++)
{
for(unsigned int j=0;j<4;j++)
{
this->TC_torso.matrix()(i,j)=in(i*4+j);
}
}
// this->TC_torso=Tmp;
// Tmp.matrix()<<in;
//
// this->TC_torso=Tmp.matrix().transpose();
ROS_WARN_STREAM("TC_torso: \n"<<TC_torso.matrix());
//r.sleep();
//ROS_INFO_STREAM("B: "<<this->B.transpose());
//ROS_INFO_STREAM("A: "<<this->A.transpose());
//r.sleep();
//ROS_INFO_STREAM("marker_size: "<<this->markerSize);
//r.sleep();
//ROS_INFO_STREAM("block_size: "<<this->thresParam1);
//ROS_INFO_STREAM("sub_constant: "<<this->thresParam2);
//r.sleep();
//ROS_INFO_STREAM("camera_info_url: "<<this->cameraParametersFile);
//ROS_INFO_STREAM("markerTrackerID: "<<this->markerTrackerID);
//r.sleep();
//ROS_INFO_STREAM("markerID: "<<this->markerID);
std::stringstream label;
label<<"SwitchFilter_"<<this->markerTrackerID;
this->switchFilterService=this->nh.advertiseService(label.str(),&MarkerDetector::MarkerTracker::SwitchFilterCallBack,this);
label.str("");
//this->cameraParameters.readFromXMLFile(this->cameraParametersFile);
this->sub = it->subscribe(this->imageTopicRaw, 1, &MarkerDetector::MarkerTracker::imageCallback,this);
//Publisher for the processed image
this->pub = it->advertise(this->imageTopicProcessed, 1);
// label<<"tfTarget_"<<this->cameraNameTag<<"_"<<this->markerTrackerID;
// this->pubTF = nh.advertise<geometry_msgs::TransformStamped>(label.str(),100);
// label.str("");
label<<"visualPoints_"<<this->cameraNameTag<<"_"<<this->markerTrackerID;
this->pubVisData=nh.advertise<aruco_marker_detector::MarkerDataArray>(label.str(),100);
label.str("");
this->Rz180<<-1,0,0,0,-1,0,0,0,1;
this->setOld=true;
this->filtered=true;
this->cameraConfigured=false;
}
MarkerTracker::~MarkerTracker()
{
delete it;
}
//bool function switch on/off the filter
bool MarkerTracker::SwitchFilterCallBack(aruco_marker_detector::switch_filter::Request &req,aruco_marker_detector::switch_filter::Response &res)
{
this->filtered=!this->filtered;//req.filtered;
res.confirm=this->filtered;
if(this->filtered)
ROS_INFO_STREAM("Marker Tracker ("<<this->markerTrackerID<<") Filter Switched ON ("<<this->filtered<<")");
else
ROS_INFO_STREAM("Marker Tracker ("<<this->markerTrackerID<<") Filter Switched OFF ("<<this->filtered<<")");
return true;
}
//This function is called everytime a new image is published
void MarkerTracker::imageCallback(const sensor_msgs::ImageConstPtr& original_image)
{
//Convert from the ROS image message to a CvImage suitable for working with OpenCV for processing
cv_bridge::CvImagePtr cv_ptr;
static tf::TransformBroadcaster br1;
tf::Transform transform;
double markerPosition[3];
double markerOrientationQ[4];
Matrix3d R,Rfixed;
//Affine3d TC_torso;
Quaterniond q_eigen;
tf::Quaternion q_tf;
//
try
{
//Always copy, returning a mutable CvImage
//OpenCV expects color images to use BGR channel order.
cv_ptr = cv_bridge::toCvCopy(original_image, sensor_msgs::image_encodings::BGR8);
}
catch (cv_bridge::Exception& e)
{
//if there is an error during conversion, display it
ROS_ERROR_STREAM(__FILE__<<"::cv_bridge exception("<<__LINE__<<": "<<e.what());
return;
}
//Get intrinsic parameters of the camera and size from image
if(!this->cameraConfigured)
{
this->cameraParameters.readFromXMLFile(this->cameraParametersFile);
this->cameraParameters.resize(cv_ptr->image.size());
this->cameraConfigured=true;
}
this->MDetector.pyrDown(0);
this->MDetector.setThresholdParams(this->thresParam1,this->thresParam2);
this->MDetector.setCornerRefinementMethod(aruco::MarkerDetector::SUBPIX);
//Detect Markers
this->MDetector.detect(cv_ptr->image,this->Markers,this->cameraParameters,this->markerSize);
std::stringstream s;
//Camera Frame
// Rz180<<-1,0,0,0,-1,0,0,0,1;
//This is the transformation from camera to world and it must be obtained from camera calib
//TC_torso.matrix()<<0,0,1,-1.1,-1,0,0,0.1,0,-1,0,0.0;
tf::transformEigenToTF(TC_torso,transform);
if(this->broadcastTF)
{
br1.sendTransform(tf::StampedTransform(transform, ros::Time::now(), cameraReferenceFrame, this->cameraNameTag));
}
tf::StampedTransform sTransform;
geometry_msgs::Transform msgTransform;
aruco_marker_detector::MarkerDataArray msgVisPointsArray;
aruco_marker_detector::MarkerData aux;
aruco::Marker tmp;
bool publishTF=false;
bool idNotDefined=true;
//for each marker, draw info and its boundaries in the image
for (unsigned int i=0;i<this->Markers.size();i++)
{
idNotDefined=true;
this->Markers[i].draw(cv_ptr->image,cv::Scalar(0,0,255),2);
this->Markers[i].OgreGetPoseParameters(markerPosition,markerOrientationQ);
R = Eigen::Quaterniond(markerOrientationQ[0], markerOrientationQ[1], markerOrientationQ[2], markerOrientationQ[3]);
Rfixed=this->Rz180*R;
q_eigen=Rfixed;
tf::quaternionEigenToTF(q_eigen,q_tf);
transform.setOrigin( tf::Vector3(-markerPosition[0], - markerPosition[1],markerPosition[2]) );
transform.setRotation(q_tf);
for(unsigned int j=0;j<this->markerIDs.size();j++)
{
if(Markers[i].id==this->markerIDs[j])
{
s<<this->markerLables[j]<<"_"<<this->cameraNameTag;
idNotDefined=false;
break;
}
}
//This is what he do if recognise a marker
//Marker with id 455 represents the target and we need to filter its pose
//If you need to filter any marker then remove the if statement and set publishTF=true
if(Markers[i].id<=40 && Markers[i].id>=20)
{
int z=Markers[i].id;
switch (z){
case 20:
{
publishTF=true;
s<<"Electronics:Phone";
break;
}
case 30:
{
publishTF=true;
s<<"Electronics:Pc";
break;
}
case 40:
{
publishTF=true;
s<<"Electronics:Printer";
break;
}
default:
{
publishTF=true;
s<<"Electronics:Undefined_Object";
}
}
}
else if(Markers[i].id<=935 && Markers[i].id>=915)
{
int z=Markers[i].id;
switch (z){
case 915:
{
publishTF=true;
s<<"Kitchen_utensil:Fork";
break;
}
case 925:
{
publishTF=true;
s<<"Kitchen_utensil:Spoon";
break;
}
case 935:
{
publishTF=true;
s<<"Kitchen_utensil:Knife";
break;
}
default:
{
publishTF=true;
s<<"Kitchen_utensil:Undefined_Object";
}
}
}
else if(Markers[i].id<=220 && Markers[i].id>=200)
{
int z=Markers[i].id;
switch (z){
case 200:
{
publishTF=true;
s<<"Container:Pot";
break;
}
case 210:
{
publishTF=true;
s<<"Container:Basket";
break;
}
case 220:
{
publishTF=true;
s<<"Container:Box";
break;
}
default:
{
publishTF=true;
s<<"Container:Undefined_Object";
}
}
}
else
{
s<<"Unknown_Object";
}
if(publishTF)
{
//Filter Signal
if(filtered)
{ //If the signal is non filtered,filter it and than save values of position and orientation
tf::Vector3 X=transform.getOrigin();
tf::Quaternion Q=transform.getRotation();
//Orientation
this->qx(0)=Q.getX();
this->qy(0)=Q.getY();
this->qz(0)=Q.getZ();
this->qw(0)=Q.getW();
//Position
this->x(0)=X.getX();
this->y(0)=X.getY();
this->z(0)=X.getZ();
if(setOld)
{
//copy the first transformation to old and vold in both real and filtered
for(unsigned int i=1;i<3;i++)
{
this->qx(i)=qx(0);
this->qy(i)=qy(0);
this->qz(i)=qz(0);
this->qw(i)=qw(0);
this->qxf(i)=qx(0);
this->qyf(i)=qy(0);
this->qzf(i)=qz(0);
this->qwf(i)=qw(0);
this->x(i)=x(0);
this->y(i)=y(0);
this->z(i)=z(0);
this->xf(i)=x(0);
this->yf(i)=y(0);
this->zf(i)=z(0);
}
setOld=false;
}
MarkerDetector::FilterButter(this->qx,this->qxf,this->B,this->A);
MarkerDetector::FilterButter(this->qy,this->qyf,this->B,this->A);
MarkerDetector::FilterButter(this->qz,this->qzf,this->B,this->A);
MarkerDetector::FilterButter(this->qw,this->qwf,this->B,this->A);
MarkerDetector::FilterButter(this->x,this->xf,this->B,this->A);
MarkerDetector::FilterButter(this->y,this->yf,this->B,this->A);
MarkerDetector::FilterButter(this->z,this->zf,this->B,this->A);
transform.setRotation(tf::Quaternion(this->qxf(0),this->qyf(0),this->qzf(0),this->qwf(0)));
transform.setOrigin(tf::Vector3(this->xf(0),this->yf(0),this->zf(0)));
}
sTransform=tf::StampedTransform(transform, ros::Time::now(), this->cameraNameTag, s.str());
if(this->broadcastTF)
{
br1.sendTransform(sTransform);
}
publishTF=false;
}
else
{
sTransform=tf::StampedTransform(transform, ros::Time::now(), this->cameraNameTag, s.str());
if(this->broadcastTF)
{
br1.sendTransform(sTransform);
}
}
//Clear the labels
s.str("");
if (cameraParameters.isValid())
{
// aruco::CvDrawingUtils::draw3dCube(cv_ptr->image,Markers1[i],cameraParameters1);
aruco::CvDrawingUtils::draw3dAxis(cv_ptr->image,Markers[i],cameraParameters);
}
aux.markerID=Markers[i].id;
cv::Point2f cent=Markers[i].getCenter();
for(unsigned int ind=0;ind<4;ind++)
{
aux.points[ind].x=Markers[i][ind].x;
aux.points[ind].y=Markers[i][ind].y;
//Force the visual points to be homogeneous --this is used with the homography transformation --
aux.points[ind].z=1.0;
}
//Plot Marker Center
aux.points[4].x=cent.x;
aux.points[4].y=cent.y;
//Force the visual points to be homogeneous --this is used with the homography transformation --
aux.points[4].z=1.0;
cv::circle(cv_ptr->image,cv::Point2f(aux.points[4].x,aux.points[4].y),1,cv::Scalar(0,255,255),6);
//Copy current transform
tf::transformTFToMsg(transform,msgTransform);
aux.tfMarker=msgTransform;
msgVisPointsArray.header.stamp = ros::Time::now();
msgVisPointsArray.header.frame_id = this->cameraNameTag;
msgVisPointsArray.mTrackerID = this->markerTrackerID;
msgVisPointsArray.markerData.push_back(aux);
//Print the visual position of the marker's center
s<<"("<<msgVisPointsArray.markerData[i].points[4].x<<","<<msgVisPointsArray.markerData[i].points[4].y<<")";
cv::putText(cv_ptr->image,s.str().c_str(),cent,cv::FONT_HERSHEY_COMPLEX,1,cv::Scalar(255,0,0),3);
s.str("");
}
/**
* The publish() function is how you send messages. The parameter
* is the message object. The type of this object must agree with the type
* given as a template parameter to the advertise<>() call, as was done
* in the constructor in main().
*/
//Convert the CvImage to a ROS image message and publish it on the "camera/image_processed" topic.
pub.publish(cv_ptr->toImageMsg());
pubVisData.publish(msgVisPointsArray);
msgVisPointsArray.markerData.clear();
}
This program recognize a marker with a specific Id! i want to use the second program using this specific marker as input
The best way to do this?
The code below will run a separate process specified by the command command and pipe the output of stdout to the string M.
FILE* p = popen("command", "r");
if (!p)
return 1;
char buf[100];
std::string M;
while (!feof(p)) {
if (fgets(buf, 100, p) != NULL)
M += buf;
}
pclose(p);
If you know that command will print whatever you need on its standard output, this should do what you want. Required includes:
#include <string>
#include <iostream>
#include <stdio.h>
EDIT:
After you posted the code of the other process, it is clear to me that you are approaching the problem wrong. You are using ROS which is basically a middleware facilitating interprocess communication in robotic applications. ROS itself provides the tools for performing the exchange of strings between the processes and you should use ROS to perform that exchange. You use topics to exchange data and in your case, one process should subscribe to a topic while another should publish to it. The receiving process will receive a callback whenever string is being published and will have access to the data. Check out http://wiki.ros.org/Topics for more info about topics in ROS.
If you are on a unix/linux system, you can pass output of one program to another with a pipe. For example
ls | wc -l
ls prints the names of all the files in a directory and wc -l takes that output and counts the number of lines.
To accept a pipe, your receiving program needs to read from stdin. For example
std::string s;
while (std::cin >> s) {
// do something with the string
}
Segmentation fault when retrieving OGRE 3D 1.81 submesh data.
I tried to make use of the code from http://alumni.cs.ucr.edu/~sorianom/cs134_09win/
to get ogre mesh data for Bullet physics engine to use, but encountered an error message that said Segment Fault.
After reducing the code to find the reason I found it's "submesh->indexData->indexStart" that caused segment fault.
I am running on Ubuntu Linux 12.10 ( 32bit x86)(Due the budget limit, we can not afford to buy Windows), and use g++ as compiler.
Please help me, I am an elementary school teacher who are trying to make some CAI programs for students to learn Math, English...
I know cs134 sample code listed on the site is a little bit too old, I've upgrade all the meshes, dotsceneloader... but still cannot figure out what have to change.
Thank you!
The following is the reduced code that still makes the segment fault error message.
#include <Ogre.h>
#include <ExampleApplication.h>
#include <btBulletCollisionCommon.h>
class SampleApp : public ExampleApplication
{
public:
// Basic constructor
SampleApp()
{}
protected:
// Just override the mandatory create scene method
void createScene(void) {
mSceneMgr->setSkyBox(true, "Examples/CloudyNoonSkyBox");
Ogre::Light* myLight = mSceneMgr->createLight("Light0");
myLight->setType(Light::LT_POINT);
myLight->setPosition(0, 40, 0);
myLight->setDiffuseColour(1, 1, 1);
myLight->setSpecularColour(1, 1, 1);
Ogre::Entity* entity = mSceneMgr->createEntity("ogre", "ogrehead.mesh");
mSceneMgr->getRootSceneNode()->createChildSceneNode()->attachObject(entity);
Ogre::MeshPtr mesh = entity->getMesh();
Ogre::Mesh::SubMeshIterator j = mesh->getSubMeshIterator();
while (j.hasMoreElements()) {
Ogre::SubMesh* submesh = j.getNext();
std::cout << " submesh:" << submesh << "\n";
std::cout << " submesh->indexData:" << submesh->indexData << "\n";
//std::cout << " submesh->indexData->indexStart:" << submesh->indexData->indexStart << "\n";
int idxStart = submesh->indexData->indexStart;
int nIdx = submesh->indexData->indexCount;
}
}
};
// ----------------------------------------------------------------------------
// Main function, just boots the application object
// ----------------------------------------------------------------------------
#if OGRE_PLATFORM == OGRE_PLATFORM_WIN32
#define WIN32_LEAN_AND_MEAN
#include "windows.h"
INT WINAPI WinMain( HINSTANCE hInst, HINSTANCE, LPSTR strCmdLine, INT )
#else
int main(int argc, char **argv)
#endif
{
// Create application object
SampleApp app;
try
{
app.go();
}
catch( Exception& e )
{
#if OGRE_PLATFORM == OGRE_PLATFORM_WIN32
MessageBox( NULL, e.getFullDescription().c_str(), "An exception has occured!", MB_OK | MB_ICONERROR | MB_TASKMODAL);
#else
std::cerr << "An exception has occured: " << e.getFullDescription();
#endif
}
return 0;
}
And the error message:
....
submesh:0xb752c080
submesh->indexData:0x8d042444
Segmentation fault (core dumped)
------------------
(program exited with code: 139)
The following is the gdb output:
(gdb) print submesh->indexData
$5 = (Ogre::IndexData *) 0x8d042444
(gdb) print submesh->indexData->indexStart
Cannot access memory at address 0x8d042458
(gdb)
ps: compiler setting: (You can compile the above code which is modified from CODE:BLOCK OGRE3D template , and bullet part isn't necessary in this reduced version)
g++ -o "%e" "%f" pkg-config --cflags --libs OGRE OIS bullet -I/usr/local/include/OGRE -I/usr/include/bullet
The original file is as following ( from http://tutorialsdl.googlecode.com/svn/trunk/Lab3/Lab3/OgreToBtMeshConverter.cpp) :
#include <Ogre.h>
#include <ExampleApplication.h>
#include <btBulletCollisionCommon.h>
class SampleApp : public ExampleApplication
{
public:
// Basic constructor
SampleApp()
{}
protected:
// Just override the mandatory create scene method
void createScene(void) {
mSceneMgr->setSkyBox(true, "Examples/CloudyNoonSkyBox");
Ogre::Light* myLight = mSceneMgr->createLight("Light0");
myLight->setType(Light::LT_POINT);
myLight->setPosition(0, 40, 0);
myLight->setDiffuseColour(1, 1, 1);
myLight->setSpecularColour(1, 1, 1);
Ogre::Entity* entity = mSceneMgr->createEntity("ogre", "ninja.mesh");
mSceneMgr->getRootSceneNode()->createChildSceneNode()->attachObject(entity);
//
// initialize bullets collision detection system
//
btVector3 worldAabbMin(-1000,-1000,-1000);
btVector3 worldAabbMax(1000,1000,1000);
btDefaultCollisionConfiguration* mColConfig;
btCollisionWorld* mColWorld;
btCollisionDispatcher* mDispatcher;
btAxisSweep3* mBroadphase;
mBroadphase = new btAxisSweep3(worldAabbMin, worldAabbMax); // broadphase
mColConfig = new btDefaultCollisionConfiguration();
mDispatcher = new btCollisionDispatcher(mColConfig); // narrowphase pair-wise checking
mColWorld = new btCollisionWorld(mDispatcher, mBroadphase, mColConfig);
//
// Registers every mesh in OGREs scene manager with the
// collision world. You should be adding your environment
// to the scene manager before this call. Your moveable
// character should be created after this call as to avoid
// having his mesh become part of the static level geometry.
//
// All meshes here are placed into group 2 and set to
// collide with group 1. You character should be in group
// 1, and set to collide with group 2. More about contact groups
// in the manual.
btCollisionWorld* colWorld=0;
registerEntityAsCollider(entity,colWorld);
}
void registerEntityAsCollider(Entity* entity, btCollisionWorld* colWorld)
{
// if you wish to do instancing you will have to share one
// btTriangleMesh amongst multiple btBvhTriangleMeshShape
// instances
btTriangleMesh* btMesh = new btTriangleMesh();
MeshPtr mesh = entity->getMesh();
Mesh::SubMeshIterator j = mesh->getSubMeshIterator();
while (j.hasMoreElements()) {
SubMesh* submesh = j.getNext();
int idxStart = submesh->indexData->indexStart;
int nIdx = submesh->indexData->indexCount;
HardwareIndexBuffer* idxBuffer
= submesh->indexData->indexBuffer.get();
HardwareVertexBufferSharedPtr virtBuffer;
VertexDeclaration* virtDecl;
if (submesh->useSharedVertices) {
virtDecl = mesh->sharedVertexData->vertexDeclaration;
assert(mesh->sharedVertexData->vertexBufferBinding->getBufferCount() > 0);
virtBuffer = mesh->sharedVertexData->vertexBufferBinding->getBuffer(0);
} else {
virtDecl = submesh->vertexData->vertexDeclaration;
assert(submesh->vertexData->vertexBufferBinding->getBufferCount() > 0);
virtBuffer = submesh->vertexData->vertexBufferBinding->getBuffer(0);
}
unsigned char* pVert = static_cast<unsigned char*>(virtBuffer->lock(HardwareBuffer::HBL_READ_ONLY));
// need to lock the buffer since vertex data resides on GPU
// and we need synchronization
unsigned short* sindices = NULL;
unsigned long* lindices = NULL;
if (idxBuffer->getType() == HardwareIndexBuffer::IT_16BIT) {
sindices = static_cast<unsigned short*>(idxBuffer->lock(HardwareBuffer::HBL_READ_ONLY));
} else if (idxBuffer->getType() == HardwareIndexBuffer::IT_32BIT) {
lindices = static_cast<unsigned long*>(idxBuffer->lock(HardwareBuffer::HBL_READ_ONLY));
} else {
assert(true == false);
}
const VertexElement* elm = virtDecl->findElementBySemantic(VES_POSITION, 0);
int offset = elm->getOffset();
assert(elm->getType() == VET_FLOAT3);
for (int k = idxStart; k < idxStart + nIdx; k += 3) {
unsigned int indices[3];
btVector3 vertices[3];
if (idxBuffer->getType() == HardwareIndexBuffer::IT_16BIT) {
for (int l = 0; l < 3; ++l) {
indices[l] = sindices[k + l];
}
} else {
for (int l = 0; l < 3; ++l) {
indices[l] = lindices[k + l];
}
}
for (int l = 0; l < 3; ++l) { // for each vertex
Real* posVert = (Real*)(pVert + indices[l] * virtBuffer->getVertexSize() + offset);
for (int m = 0; m < 3; ++m) { // for each vertex component
vertices[l][m] = posVert[m];
}
}
btMesh->addTriangle(vertices[0], vertices[1], vertices[2]);
}
idxBuffer->unlock();
virtBuffer->unlock();
}
btBvhTriangleMeshShape* btMeshShape = new btBvhTriangleMeshShape(btMesh, true, true);
btCollisionObject* btObj = new btCollisionObject();
btObj->setCollisionShape(btMeshShape);
Vector3 pos = entity->getParentSceneNode()->getPosition();
Quaternion orient = entity->getParentSceneNode()->getOrientation();
Vector3 scale = entity->getParentSceneNode()->getScale();
btMeshShape->setLocalScaling(btVector3(scale[0], scale[1], scale[2]));
btMeshShape->setMargin(0.0);
btTransform btTrans;
btTrans.setIdentity();
btTrans.setOrigin(btVector3(pos[0], pos[1], pos[2]));
btTrans.setRotation(btQuaternion(orient[1], orient[2], orient[3], orient[0]));
btObj->setWorldTransform(btTrans);
// the last 2 parameters are bit strings representing group membership
// and the groups which it is allowed to collide with
colWorld->addCollisionObject(btObj, 2, 1);
}
//
// This method will take every entity in your scene and register it as
// a mesh in the btCollisionWorld. NOTE: Be sure to call this function after
// you've added your static entities (environment) to the scene manager but
// before you add your characters.
//
void registerAllEntitiesAsColliders(SceneManager* sceneMgr, btCollisionWorld* colWorld)
{
SceneManager::MovableObjectIterator
i = sceneMgr->getMovableObjectIterator("Entity");
while (i.hasMoreElements()) {
Entity* entity = static_cast<Entity*>(i.getNext());
registerEntityAsCollider(entity, colWorld);
}
}
};
// ----------------------------------------------------------------------------
// Main function, just boots the application object
// ----------------------------------------------------------------------------
#if OGRE_PLATFORM == OGRE_PLATFORM_WIN32
#define WIN32_LEAN_AND_MEAN
#include "windows.h"
INT WINAPI WinMain( HINSTANCE hInst, HINSTANCE, LPSTR strCmdLine, INT )
#else
int main(int argc, char **argv)
#endif
{
// Create application object
SampleApp app;
try
{
app.go();
}
catch( Exception& e )
{
#if OGRE_PLATFORM == OGRE_PLATFORM_WIN32
MessageBox( NULL, e.getFullDescription().c_str(), "An exception has occured!", MB_OK | MB_ICONERROR | MB_TASKMODAL);
#else
std::cerr << "An exception has occured: " << e.getFullDescription();
#endif
}
return 0;
}