Related
I want to read asynchronously from a child process's stdin, so I create an IOCP loop that listens for when ReadFile() is done. But ReadFile() is never done, it always returns ERROR_IO_PENDING, and WriteFile() always returns ERROR_IO_PENDING.
Node.js can asynchronously read from a child process:
const child_process = require("child_process")
var p = child_process.spawn("gcc")
p.stdout.on('data', data=>console.log(String(data)))
p.stderr.on('data', data=>console.log(String(data)))
How to do this in the Win32 API?
#include <windows.h>
#include <stdio.h>
#include <assert.h>
#include <wchar.h>
#pragma comment (lib, "User32.lib")
static HANDLE iocp = INVALID_HANDLE_VALUE;
typedef struct PROCESS {
OVERLAPPED ol;
HANDLE hProcess,
stdin_read, stdin_write,
stdout_read, stdout_write,
stderr_read, stderr_write;
char buf[100];
} PROCESS, *PPROCESS, *LPROCESS;
DWORD WINAPI Worker(LPVOID param);
BOOL create_pipe(HANDLE* pserver_pipe, HANDLE* pclient_pipe, PROCESS* p) {
static __int64 counter=0;
HANDLE server_pipe, client_pipe;
int err;
WCHAR name[64];
for (;;) {
swprintf(name, sizeof(name), L"\\\\?\\pipe\\child\\%Id.%p", counter, p);
server_pipe = CreateNamedPipeW(
name,
PIPE_ACCESS_OUTBOUND | FILE_FLAG_OVERLAPPED | FILE_FLAG_FIRST_PIPE_INSTANCE | WRITE_DAC,
PIPE_TYPE_BYTE | PIPE_READMODE_BYTE | PIPE_WAIT,
1,
65536,
65536,
0,
NULL);
if (server_pipe != INVALID_HANDLE_VALUE)
break;
err = GetLastError();
if (err != ERROR_PIPE_BUSY && err != ERROR_ACCESS_DENIED) {
return FALSE;
}
counter++;
}
SECURITY_ATTRIBUTES sa;
sa.nLength = sizeof sa;
sa.lpSecurityDescriptor = NULL;
sa.bInheritHandle = 1;
client_pipe = CreateFileW(name,
GENERIC_READ|WRITE_DAC,
0,
&sa,
OPEN_EXISTING,
FILE_FLAG_OVERLAPPED,
NULL);
if (client_pipe==INVALID_HANDLE_VALUE){
return FALSE;
}
if (CreateIoCompletionPort(client_pipe, iocp, (ULONG_PTR)p, 0)==NULL){
return FALSE;
}
if (CreateIoCompletionPort(server_pipe, iocp, (ULONG_PTR)p, 0)==NULL){
return FALSE;
}
*pclient_pipe = client_pipe;
*pserver_pipe = server_pipe;
return TRUE;
}
int wmain()
{
iocp = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
assert(iocp);
PROCESS child_process{};
assert(create_pipe(&child_process.stdout_write, &child_process.stdout_read, &child_process));
assert(create_pipe(&child_process.stderr_write, &child_process.stderr_read, &child_process));
assert(create_pipe(&child_process.stdin_write, &child_process.stdin_read, &child_process));
HANDLE hThread = CreateThread(NULL, 0, Worker, iocp, 0, NULL);
assert(hThread);
WCHAR szCmdline[]=L"cmd";
PROCESS_INFORMATION piProcInfo{};
STARTUPINFOW siStartInfo{.cb = sizeof(STARTUPINFO),
.dwFlags=STARTF_USESTDHANDLES,
.hStdInput=child_process.stdin_read,
.hStdOutput=child_process.stdout_write,
.hStdError=child_process.stderr_write};
assert(CreateProcessW(NULL, szCmdline, NULL, NULL, TRUE, 0, NULL, NULL, &siStartInfo, &piProcInfo));
CloseHandle(piProcInfo.hProcess);
CloseHandle(piProcInfo.hThread);
// CloseHandle(child_process.stdout_write);
// CloseHandle(child_process.stdin_read);
ReadFile(child_process.stdout_read, child_process.buf, sizeof(child_process.buf), NULL, &child_process.ol);
int err = GetLastError();
if (err!=ERROR_IO_PENDING){
printf("Error in ReadFile %d\n", err);
}else{
puts("ReadFile is pending...\n");
}
char buf[100];
DWORD dwIn;
for(;;){
ReadConsoleA(GetStdHandle(STD_INPUT_HANDLE), buf, sizeof buf, &dwIn, NULL);
if (dwIn<=0)
break;
WriteFile(child_process.stdin_write, buf, dwIn, NULL, &child_process.ol);
err = GetLastError();
if (err!=ERROR_IO_PENDING){
printf("Error in WriteFile %d\n", err);
}else{
puts("WriteFile is pending...\n");
}
}
PostQueuedCompletionStatus(iocp, 0, 0, 0);
WaitForSingleObject(hThread, INFINITE);
CloseHandle(hThread);
CloseHandle(iocp);
}
DWORD WINAPI Worker(LPVOID param) {
DWORD dwIoSize = 0;
LPOVERLAPPED ol;
PROCESS* ctx;
for(;;){
BOOL bSuccess = GetQueuedCompletionStatus((HANDLE)param, &dwIoSize,
(PDWORD_PTR)&ctx,
&ol,
INFINITE);
if (ctx == NULL) {
printf("ctx is NULL, maybe you call PostQueuedCompletionStatus? err=%d\n", GetLastError());
break;
}
if (bSuccess==FALSE || dwIoSize == 0) {
printf("GetQueuedCompletionStatus does not success(maybe EOF reached?) err=%d\n", GetLastError());
break;
}
WriteConsoleA(GetStdHandle(STD_OUTPUT_HANDLE), ctx->buf, dwIoSize, NULL, NULL);
ReadFile(ctx->stdout_read, ctx->buf, sizeof(ctx->buf), NULL, &ctx->ol);
}
return 0;
}
After search on Github, it's quite easy.
Create a Named Pipe for current process(read), and create a File for child process(write).
Github links:
https://github.com/parasol-framework/parasol/blob/c5ad64188c7496e3ff54eb75fd03c9e3124fe08b/src/core/microsoft/processes.c
https://github.com/dinamsky/malware-botnets/blob/4cd142d10f971cb93c334b6f48c12c85bcc8f63a/Phatbot-stoney/Phatbot-stoney/cmdshell.cpp
https://github.com/grimjoey/jlstudio/blob/e5d2d81f0a94d2020f2e912e43a487d9cb6f7c33/src/jls/process.cpp
https://github.com/Ai-Thinker-Open/Ai-Thinker-Open_ESP32-S2_SDK/blob/7d75213674b4572f90c68162ad6fe9b16dae65ad/tools/windows/tool_setup/cmdlinerunner/cmdlinerunner.c
https://github.com/LeonColt/skripsi/blob/141af593ec65cd7adaedf7a90fc4cd7cde5cc602/Maschinen/RedirectIOChildProcess.cpp
redirect stdout and stderr
#include <windows.h>
#include <assert.h>
#include <stdio.h>
struct Stdio;
typedef void (*Callback)(struct Stdio* self, DWORD len);
struct Stdio {
OVERLAPPED ol;
HANDLE pipe;
BYTE buf[100];
Callback callback;
};
typedef struct CTX {
struct Stdio Stdout, Stderr, Stdin;
} CTX, *LCTX, *LPCTX;
HANDLE Hstdout=INVALID_HANDLE_VALUE;
HANDLE Hstderr=INVALID_HANDLE_VALUE;
DWORD WINAPI Worker(LPVOID iocp){
struct Stdio *stdio;
OVERLAPPED *ol;
DWORD dwIoSize;
for(;;){
if (!GetQueuedCompletionStatus(iocp, &dwIoSize, (PDWORD_PTR)&stdio, &ol, INFINITE) || dwIoSize==0 || ol==NULL || stdio==NULL){
switch (GetLastError()){
case ERROR_BROKEN_PIPE:
puts("the process has been exited, exit thread...");
break;
default:
printf("error = %d, exit thread\n", GetLastError());
}
break;
}
stdio->callback(stdio, dwIoSize);
}
return 0;
}
void OnStdoutRead(struct Stdio *self, DWORD len){
WriteConsoleA(Hstdout, self->buf, len, NULL, NULL);
ReadFile(self->pipe, self->buf, sizeof(self->buf), NULL, &self->ol);
}
void OnStderrRead(struct Stdio *self, DWORD len){
WriteConsoleA(Hstderr, self->buf, len, NULL, NULL);
ReadFile(self->pipe, self->buf, sizeof(self->buf), NULL, &self->ol);
}
int wmain(){
assert(Hstdout = GetStdHandle(STD_OUTPUT_HANDLE));
assert(Hstderr = GetStdHandle(STD_ERROR_HANDLE));
HANDLE Pstdout, Pstderr; // child process's
CTX ctx{.Stdout=Stdio{.callback=OnStdoutRead}, .Stderr=Stdio{.callback=OnStderrRead}};
HANDLE iocp = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
HANDLE hThread = CreateThread(NULL, 0, Worker, iocp, 0, NULL); // Worker thread
SECURITY_ATTRIBUTES sa{.nLength=sizeof(SECURITY_ATTRIBUTES), .bInheritHandle=TRUE};
const WCHAR* pipe_name1 = L"\\\\.\\Pipe\\child-1";
assert((ctx.Stdout.pipe = CreateNamedPipeW(
pipe_name1,
PIPE_ACCESS_INBOUND | FILE_FLAG_OVERLAPPED,
PIPE_TYPE_BYTE,
1,
4096,
4096,
5000,
NULL))!=INVALID_HANDLE_VALUE);
assert(INVALID_HANDLE_VALUE != (Pstdout = CreateFileW(
pipe_name1,
GENERIC_WRITE,
0,
&sa,
OPEN_EXISTING,
FILE_FLAG_OVERLAPPED,
NULL)));
const WCHAR *pipe_name2 = L"\\\\.\\Pipe\\child-2";
assert((ctx.Stderr.pipe = CreateNamedPipeW(
pipe_name2,
PIPE_ACCESS_INBOUND | FILE_FLAG_OVERLAPPED,
PIPE_TYPE_BYTE,
1,
4096,
4096,
5000,
NULL))!=INVALID_HANDLE_VALUE);
assert((Pstderr = CreateFileW(
pipe_name2,
GENERIC_WRITE,
0,
&sa,
OPEN_EXISTING,
FILE_FLAG_OVERLAPPED,
NULL))!=INVALID_HANDLE_VALUE);
STARTUPINFOW si{.cb = sizeof(si),
.dwFlags = STARTF_USESTDHANDLES | STARTF_USESHOWWINDOW,
.wShowWindow = SW_HIDE,
.hStdInput = GetStdHandle(STD_INPUT_HANDLE), // use current stdin
.hStdOutput = Pstdout,
.hStdError = Pstderr};
WCHAR cmd[] = L"powershell"; // or cmd, py, bash...
PROCESS_INFORMATION pInfo{};
assert(CreateProcessW(
NULL, cmd, NULL, NULL,
TRUE, 0, NULL, NULL,
&si, &pInfo));
assert(CloseHandle(Pstdout)); // we don't need this
assert(CloseHandle(Pstderr)); // we don't need this
assert(CreateIoCompletionPort(ctx.Stdout.pipe, iocp, (ULONG_PTR)&ctx.Stdout, 0));
assert(CreateIoCompletionPort(ctx.Stderr.pipe, iocp, (ULONG_PTR)&ctx.Stderr, 0));
ReadFile(ctx.Stdout.pipe, ctx.Stdout.buf, sizeof(ctx.Stdout.buf), NULL, &ctx.Stdout.ol);
ReadFile(ctx.Stderr.pipe, ctx.Stdout.buf, sizeof(ctx.Stderr.buf), NULL, &ctx.Stderr.ol);
WaitForSingleObject(pInfo.hProcess, INFINITE); // wait for process exit
PostQueuedCompletionStatus(iocp, 0, 0, NULL); // tell IOCP Worker exit
WaitForSingleObject(hThread, INFINITE); // wait for thread exit
assert(CloseHandle(hThread));
assert(CloseHandle(ctx.Stderr.pipe));
assert(CloseHandle(ctx.Stdout.pipe));
assert(CloseHandle(pInfo.hProcess));
assert(CloseHandle(iocp));
assert(CloseHandle(Hstderr));
puts("exit main function..."); // !!important: before close stdout
assert(CloseHandle(Hstdout));
}
Redirect stdin, stdout, stderr
#include <windows.h>
#include <assert.h>
#include <stdio.h>
static HANDLE Hstdout, Hstderr, Hstdin, HstopEvent;
struct Stdio;
typedef void (*Callback)(struct Stdio* self, DWORD len);
struct Stdio {
OVERLAPPED ol;
HANDLE pipe;
BYTE buf[100];
Callback callback;
};
typedef struct CTX {
struct Stdio Stdout, Stderr, Stdin;
} CTX, *LCTX, *LPCTX;
DWORD WINAPI Worker(LPVOID iocp){
struct Stdio *stdio;
OVERLAPPED *ol;
DWORD dwIoSize;
for(;;){
if (!GetQueuedCompletionStatus(iocp, &dwIoSize, (PDWORD_PTR)&stdio, &ol, INFINITE) || dwIoSize==0 || ol==NULL || stdio==NULL){
switch (GetLastError()){
case ERROR_BROKEN_PIPE:
SetEvent(HstopEvent);
puts("the process has been exited, exit thread...");
break;
default:
printf("error = %d, exit thread\n", GetLastError());
}
break;
}
stdio->callback(stdio, dwIoSize);
}
return 0;
}
void OnStdoutRead(struct Stdio *self, DWORD len){
WriteConsoleA(Hstdout, self->buf, len, NULL, NULL);
ReadFile(self->pipe, self->buf, sizeof(self->buf), NULL, &self->ol);
}
void OnStderrRead(struct Stdio *self, DWORD len){
WriteConsoleA(Hstderr, self->buf, len, NULL, NULL);
ReadFile(self->pipe, self->buf, sizeof(self->buf), NULL, &self->ol);
}
void OnStdinWriteComplete(struct Stdio *self, DWORD len){
printf("[%u bytes write to stdin]\n", len);
}
int wmain(){
assert((Hstdout = GetStdHandle(STD_OUTPUT_HANDLE))!=INVALID_HANDLE_VALUE);
assert((Hstderr = GetStdHandle(STD_ERROR_HANDLE))!=INVALID_HANDLE_VALUE);
assert((Hstdin = GetStdHandle(STD_INPUT_HANDLE))!=INVALID_HANDLE_VALUE);
assert((HstopEvent=CreateEventW(NULL, FALSE, FALSE, NULL))!=INVALID_HANDLE_VALUE);
CTX ctx{.Stdout=Stdio{.callback=OnStdoutRead}, .Stderr=Stdio{.callback=OnStderrRead}, .Stdin=Stdio{.callback=OnStdinWriteComplete}};
STARTUPINFOW si{.cb = sizeof(si), .dwFlags = STARTF_USESTDHANDLES};
HANDLE iocp = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
HANDLE hThread = CreateThread(NULL, 0, Worker, iocp, 0, NULL); // Worker thread
SECURITY_ATTRIBUTES sa{.nLength=sizeof(SECURITY_ATTRIBUTES), .bInheritHandle=TRUE};
const WCHAR* pipe_name1 = L"\\\\.\\Pipe\\child-1";
assert((ctx.Stdout.pipe = CreateNamedPipeW(
pipe_name1,
PIPE_ACCESS_INBOUND | FILE_FLAG_OVERLAPPED,
PIPE_TYPE_BYTE,
1,
4096,
4096,
5000,
NULL))!=INVALID_HANDLE_VALUE);
assert((si.hStdOutput = CreateFileW(
pipe_name1,
GENERIC_WRITE,
0,
&sa,
OPEN_EXISTING,
FILE_FLAG_OVERLAPPED,
NULL))!=INVALID_HANDLE_VALUE);
const WCHAR *pipe_name2 = L"\\\\.\\Pipe\\child-2";
assert((ctx.Stderr.pipe = CreateNamedPipeW(
pipe_name2,
PIPE_ACCESS_INBOUND | FILE_FLAG_OVERLAPPED,
PIPE_TYPE_BYTE,
1,
4096,
4096,
5000,
NULL))!=INVALID_HANDLE_VALUE);
assert((si.hStdError = CreateFileW(
pipe_name2,
GENERIC_WRITE,
0,
&sa,
OPEN_EXISTING,
FILE_FLAG_OVERLAPPED,
NULL))!=INVALID_HANDLE_VALUE);
const WCHAR *pipe_name3 = L"\\\\.\\Pipe\\child-3";
assert((ctx.Stdin.pipe = CreateNamedPipeW(
pipe_name3,
PIPE_ACCESS_OUTBOUND,
PIPE_TYPE_BYTE,
1,
4096,
4096,
5000,
NULL))!=INVALID_HANDLE_VALUE);
assert((si.hStdInput = CreateFileW(
pipe_name3,
GENERIC_READ,
0,
&sa,
OPEN_EXISTING,
FILE_FLAG_OVERLAPPED,
NULL))!=INVALID_HANDLE_VALUE);
WCHAR cmd[] = L"powershell"; // or cmd, py, bash...
PROCESS_INFORMATION pInfo{};
assert(CreateProcessW(
NULL, cmd, NULL, NULL,
TRUE, 0, NULL, NULL,
&si, &pInfo));
assert(CloseHandle(si.hStdError)); // we don't need this
assert(CloseHandle(si.hStdInput)); // we don't need this
assert(CloseHandle(si.hStdOutput)); // we don't need this
assert(CreateIoCompletionPort(ctx.Stdout.pipe, iocp, (ULONG_PTR)&ctx.Stdout, 0));
assert(CreateIoCompletionPort(ctx.Stderr.pipe, iocp, (ULONG_PTR)&ctx.Stderr, 0));
ReadFile(ctx.Stdout.pipe, ctx.Stdout.buf, sizeof(ctx.Stdout.buf), NULL, &ctx.Stdout.ol);
ReadFile(ctx.Stderr.pipe, ctx.Stdout.buf, sizeof(ctx.Stderr.buf), NULL, &ctx.Stderr.ol);
DWORD dwIoSize;
for(;;){
CHAR buf[100];
ReadConsoleA(Hstdin, buf, sizeof(buf), &dwIoSize, NULL);
if (WaitForSingleObject(HstopEvent, 0)==WAIT_OBJECT_0)
break;
WriteFile(ctx.Stdin.pipe, buf, dwIoSize, NULL, &ctx.Stdin.ol);
if (WaitForSingleObject(HstopEvent, 0)==WAIT_OBJECT_0)
break;
}
PostQueuedCompletionStatus(iocp, 0, 0, NULL); // tell IOCP Worker exit
WaitForSingleObject(hThread, INFINITE); // wait for thread exit
assert(CloseHandle(hThread));
assert(CloseHandle(ctx.Stderr.pipe));
assert(CloseHandle(ctx.Stdout.pipe));
assert(CloseHandle(pInfo.hProcess));
assert(CloseHandle(iocp));
assert(CloseHandle(Hstderr));
assert(CloseHandle(HstopEvent));
puts("exit main function..."); // !!important: before close stdout
assert(CloseHandle(Hstdout));
}
I tried to make parallel bfs in openCL but I didn't have enough experience with c++.
So this is probably memory error, but I really don't know how to fix it.
I also can't find what does error value -51 means.
As a result I got "Unhandled exception at 0x00007FFCFB06A549 (amdocl64.dll) in my project.exe: 0xC0000005: Access violation reading location 0xFFFFFFFFFFFFFFFF" in next line.
main
Graph G(AdjacencyList, Directed);
int startVertex;
vector<int> distance;
vector<bool> visited;
distance = vector<int>(G.numVertices);
visited = vector<bool>(G.numVertices);
bool done = false;
const bool true_value = true;
int level = 0;
// Allocation on device
const int size = G.numVertices * sizeof(int);
const int adjacencySize = G.adjacencyList.size() * sizeof(int);
//OpenCL
cl_int status;
cl_int ret;
cl_platform_id platform_id;
clGetPlatformIDs(1, &platform_id, NULL);
cl_device_id device_id;
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &status);
cl_command_queue command_queue = clCreateCommandQueueWithProperties(context, device_id, NULL, &status);
cl_mem d_adjacencyList = clCreateBuffer(context, CL_MEM_READ_WRITE, adjacencySize, NULL, &status);
cl_mem d_edgesOffset = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);
cl_mem d_edgesSize = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);
cl_mem d_distance = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);
cl_mem d_done = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(bool), NULL, &status);
status = clEnqueueWriteBuffer(command_queue, d_adjacencyList, CL_TRUE, 0, adjacencySize, &G.adjacencyList[0], 0, NULL, NULL);
status = clEnqueueWriteBuffer(command_queue, d_edgesOffset, CL_TRUE, 0, size, &G.edgesOffset[0], 0, NULL, NULL);
status = clEnqueueWriteBuffer(command_queue, d_edgesSize, CL_TRUE, 0, size, &G.edgesSize[0], 0, NULL, NULL);
distance = vector<int>(G.numVertices, INT_MAX);
distance[start] = 0;
status = clEnqueueWriteBuffer(command_queue, d_distance, CL_TRUE, 0, size, distance.data(), 0, NULL, NULL);
char* source_str = NULL;
size_t source_size;
FILE* fp;
fp = fopen("bfs.cl", "r");
if (!fp)
{
cout << "Failed to load Kernel\n";
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
cl_program program = clCreateProgramWithSource(context, 1, (const char**)&source_str, (const size_t*)&source_size, &status);
status = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, "bfs", &status);
status = clSetKernelArg(kernel, 0, sizeof(int), (void*)&G.numVertices);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&d_adjacencyList);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&d_edgesOffset);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*)&d_edgesOffset);
status = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*)&d_edgesSize);
status = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void*)&d_distance); //here retirns -51
status = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*)&level);
status = clSetKernelArg(kernel, 7, sizeof(cl_mem), (void*)&d_done);
kernel
__kernel void bfs(int n, __global int *adjacencyList,__global int *edgesOffset,__global int *edgesSize,__global int *distance, int level,__global bool *done) {
int tid = get_global_id(0);
if (tid < n) {
if (distance[tid] == level) {
for (int i = edgesOffset[tid]; i < edgesOffset[tid] + edgesSize[tid]; ++i) {
int v = adjacencyList[i];
if (distance[v] == INT_MAX) {
*done = false;
distance[v] = level + 1;
}
}
}
}
}
Hi #Parrison welcome to StackOverflow!
All the OpenCL error codes are defined in cl.h. In the latest (version 3) cl.h you will find the error codes defined between lines 194 and 270, where on line 241 you will find:
#define CL_INVALID_ARG_SIZE -51
So the OpenCL ICD reckons that you have passed the wrong variable size for distance.
However, I can see many other errors before this one. For example, you need to set the size of the OpenCL buffers based on the sizes of OpenCL variable not native variables, e.g.:
cl_int instead of int
cl_float instead of float
and especially cl_bool instead of bool.
There is no guarantee that an OpenCL cl_int is the same size a host int and an OpenCL cl_bool is defined as an unsigned int which is highly unlikely to be the same size as a bool!
Ensure that all the parameters to your OpenCL kernel are defined correctly and that
you are creating the correct buffers and variables for them in the main program.
I want to fill an array of glm::vec3 with an OpenCL kernel.
All I want to do is fill the array with [1.0, 2.0, 3.0].
So upon success I should get the triplet repeated 256 times.
[1.0, 2.0, 3.0][1.0, 2.0, 3.0][1.0, 2.0, 3.0] ... [1.0, 2.0, 3.0]
However the result looks like this
[1.0, 2.0, 2.0][2.0, 2.0, 2.0] ... [2.0, 2.0, 2.0]
Why?
Here is the code for the kernel
__kernel void fill_array(__global float *output_values)
{
int i = get_global_id(0);
float3 pos = (float3)(1.0, 2.0, 3.0);
vstore3(pos, 0, &(output_values[i]));
}
And here is the code to run it
#include <stdio.h>
#include <stdlib.h>
#include <vector>
#include "glm/glm.hpp"
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define MAX_SOURCE_SIZE (0x100000)
int main(void)
{
std::vector<glm::vec3> values;
values.resize(256);
// Load the kernel source code into the array source_str
FILE *fp;
char *source_str;
size_t source_size;
fp = fopen("E:/Dev/fill_array_kernel.cl", "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
fclose( fp );
// Get platform and device information
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_ALL, 1,
&device_id, &ret_num_devices);
// Create an OpenCL context
cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
// Create a command queue
cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
// Create memory buffers on the device for each vector
cl_mem output_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, values.size() * sizeof(glm::vec3), NULL, &ret);
// Create a program from the kernel source
cl_program program = clCreateProgramWithSource(context, 1,
(const char **)&source_str, (const size_t *)&source_size, &ret);
// Build the program
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
if(ret != CL_SUCCESS)
{
cl_build_status build_status;
ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &build_status, NULL);
size_t ret_val_size;
ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
char *build_log = (char*)malloc(sizeof(char)*(ret_val_size + 1));
ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
build_log[ret_val_size] = '\0';
printf("%s\n", build_log);
free(build_log);
return -1;
}
// Create the OpenCL kernel
cl_kernel kernel = clCreateKernel(program, "fill_array", &ret);
// Set the arguments of the kernel
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&output_mem);
// Execute the OpenCL kernel on the list
size_t global_item_size = values.size(); // Process the entire lists
size_t local_item_size = 64; // Process in groups of 64
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
&global_item_size, &local_item_size, 0, NULL, NULL);
// Read the memory buffer C on the device to the local variable C
ret = clEnqueueReadBuffer(command_queue, output_mem, CL_TRUE, 0, values.size() * sizeof(glm::vec3), values.data(), 0, NULL, NULL);
// Clean up
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(output_mem);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
return 0;
}
I was misusing the vstore function.
I should have used the 2nd parameter to specify the index in the array.
https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/vstoren.html
__kernel void fill_array(__global float *output_values)
{
int i = get_global_id(0);
float3 pos = (float3)(1.0, 2.0, 3.0);
vstore3(pos, i, output_values);
}
I am a beginner at OpenCL. I tried to run a very simple kernel code, adding 1 to each value of vector. Everything runs fine, returns no error code (I checked return value after each step). The source Code :
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem memobj , resobj = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_platform_id platform_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
size_t work_units_per_kernels;
int input[10] = {1,2,3,4,5,6,7,8,9,10};
int output[10];
int length = 10 ;
FILE *fp;
char fileName[] = "/home/tuan/OpenCLPlayaround/hello.cl";
char *source_str;
size_t source_size;
/* Load the source code containing the kernel*/
fp = fopen(fileName, "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(0x100000);
source_size = fread(source_str,1,0x100000, fp);
fclose(fp);
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
std::cout<<ret<<" code"<<std::endl;
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
std::cout<<ret<<" code"<<std::endl;
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
std::cout<<ret<<" code"<<std::endl;
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
//Check Concept of memory
memobj = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,length * sizeof(int), input, &ret);
resobj = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length * sizeof(int), output, &ret);
std::cout<<ret<<" code"<<std::endl;
program = clCreateProgramWithSource(context,1,(const char**)&source_str, (const size_t*)&source_size, &ret);
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
kernel = clCreateKernel(program, "hello", &ret);
ret = clSetKernelArg(kernel,0, sizeof(memobj),(void *)&memobj);
ret = clSetKernelArg(kernel,1, sizeof(resobj),(void *)&resobj);
ret = clEnqueueTask(command_queue, kernel, 0, NULL,NULL);
ret = clEnqueueReadBuffer(command_queue, resobj, CL_TRUE, 0, length* sizeof(int),output, 0, NULL, NULL);
for (int i = 0 ; i <10 ; i++) {
std::cout<<output[i]<<" "<<std::endl;
}
return 0;
The result is somewhat bizarre, while it should be {2,3,4,5,6,7,8,9,10,11} :
2
-16777216
65535
1
-1242789408
32767
4201449
0
2
0
And my kernel :
__kernel void hello(__global int* a, __global int* b)
{
int sam = 0;
int gid = get_global_id(0);
b[gid] = sam + a[gid] +1 ;
}
Can somebody explain why ? Its bursting my head for hours !
clEnqueueTask is equivalent to calling clEnqueueNDRangeKernel with work_dim = 1, global_work_offset = NULL, global_work_size[0] set to 1, and local_work_size[0] set to 1.
so use clEnqueueNDRangeKernel.
I want to implement digital signature app using CryptVerifySignature. I've written this code(with help of MSDN example):
#define Check(condition, message) if (!(condition)) { fprintf(stderr, "%s\n", message); goto Exit; };
void DigSign(const char* inputFileName, const char* signFileName)
{
HCRYPTPROV hProv;
BYTE *pbBuffer= NULL;
DWORD dwBufferLen = 0;
HCRYPTHASH hHash;
HCRYPTKEY hKey;
HCRYPTKEY hPubKey;
BYTE *pbKeyBlob;
BYTE *pbSignature;
DWORD dwSigLen;
DWORD dwBlobLen;
FILE* inputFile = NULL;
Check((inputFile = fopen(inputFileName, "r")) != NULL, "File does not exists");
dwBufferLen = GetFileSize(inputFile);
Check(pbBuffer = (BYTE*)malloc(dwBufferLen + 1), "cannot allocate memory");
fread(pbBuffer, 1, dwBufferLen, inputFile);
pbBuffer[dwBufferLen] = '\0';
fclose(inputFile);
//-------------------------------------------------------------------
// Acquire a cryptographic provider context handle.
Check(CryptAcquireContext(&hProv, NULL, NULL, PROV_RSA_FULL, 0), "Error during CryptAcquireContext.");
if(!CryptGetUserKey(hProv, AT_SIGNATURE, &hKey))
{
if(NTE_NO_KEY == GetLastError())
{
Check(CryptGenKey(hProv, AT_SIGNATURE, CRYPT_EXPORTABLE, &hKey), "Could not create a user public key.\n");
}
else
{
goto Exit;
}
}
Check(CryptExportKey(hKey, NULL, PUBLICKEYBLOB, 0, NULL, &dwBlobLen), "Error computing BLOB length.");
Check(pbKeyBlob = (BYTE*)malloc(dwBlobLen), "Out of memory. \n");
Check(CryptExportKey(hKey, NULL, PUBLICKEYBLOB, 0, pbKeyBlob, &dwBlobLen), "Error during CryptExportKey.");
//-------------------------------------------------------------------
// Create the hash object.
Check(CryptCreateHash(hProv, CALG_SHA, 0, 0, &hHash), "Error during CryptCreateHash.");
//-------------------------------------------------------------------
// Compute the cryptographic hash of the buffer.
Check(CryptHashData(hHash, pbBuffer, dwBufferLen, 0), "Error during CryptHashData.");
//-------------------------------------------------------------------
// Determine the size of the signature and allocate memory.
dwSigLen= 0;
Check(CryptSignHash(hHash, AT_SIGNATURE, NULL, 0, NULL, &dwSigLen), "Error during CryptSignHash.");
//-------------------------------------------------------------------
// Allocate memory for the signature buffer.
Check(pbSignature = (BYTE *)malloc(dwSigLen), "Out of memory.");
//-------------------------------------------------------------------
// Sign the hash object.
Check(CryptSignHash(hHash, AT_SIGNATURE, NULL, 0, pbSignature, &dwSigLen), "Error during CryptSignHash.");
FILE* f = fopen(signFileName, "w");
fwrite(pbSignature, dwSigLen, 1, f);
printf("W: %.128s\n", pbSignature);
fwrite(pbKeyBlob, dwBlobLen, 1, f);
printf("W: %.148s\n", pbKeyBlob);
fclose(f);
//-------------------------------------------------------------------
// Destroy the hash object.
if(hHash)
CryptDestroyHash(hHash);
free(pbSignature);
free(pbKeyBlob);
if(hProv)
CryptReleaseContext(hProv, 0);
Exit:;
}
bool CheckDigSign(const char* inputFileName, const char* signFileName, const char* userName)
{
bool result = false;
HCRYPTPROV hProv;
BYTE *pbBuffer= (BYTE *)"The data that is to be hashed and signed.";
DWORD dwBufferLen = strlen((char *)pbBuffer)+1;
HCRYPTHASH hHash;
HCRYPTKEY hKey;
HCRYPTKEY hPubKey;
BYTE *pbKeyBlob;
BYTE *pbSignature;
DWORD dwSigLen;
DWORD dwBlobLen;
FILE* inputFile = NULL;
Check((inputFile = fopen(inputFileName, "r")) != NULL, "File does not exists");
dwBufferLen = GetFileSize(inputFile);
Check(pbBuffer = (BYTE*)malloc(dwBufferLen + 1), "cannot allocate memory");
fread(pbBuffer, 1, dwBufferLen, inputFile);
pbBuffer[dwBufferLen] = '\0';
fclose(inputFile);
FILE* signFile = NULL;
Check((signFile = fopen(signFileName, "r")) != NULL, "File does not exists");
DWORD dwSignFileLen = GetFileSize(signFile);
dwSigLen = 128;
pbSignature = (BYTE*)malloc(dwSigLen);
dwBlobLen = dwSignFileLen - dwSigLen;
pbKeyBlob = (BYTE*)malloc(dwBlobLen);
fread(pbSignature, 1, dwSigLen, signFile);
fread(pbKeyBlob, 1, dwBlobLen, signFile);
fclose(signFile);
printf("R: %.128s\n", pbSignature);
printf("R: %.148s\n", pbKeyBlob);
Check(CryptAcquireContext(&hProv, NULL, NULL, PROV_RSA_FULL, 0), "Error during CryptAcquireContext.");
Check(CryptImportKey(hProv, pbKeyBlob, dwBlobLen, 0, 0, &hPubKey), "Public key import failed.");
//-------------------------------------------------------------------
// Create a new hash object.
Check(CryptCreateHash(hProv, CALG_SHA, 0, 0, &hHash), "Error during CryptCreateHash.");
//-------------------------------------------------------------------
// Compute the cryptographic hash of the buffer.
Check(CryptHashData(hHash, pbBuffer, dwBufferLen, 0), "Error during CryptHashData.");
//-------------------------------------------------------------------
// Validate the digital signature.
result = CryptVerifySignature(hHash, pbSignature, dwSigLen, hPubKey, NULL, 0);
printf("%u %x", GetLastError(), GetLastError());
//-------------------------------------------------------------------
// Free memory to be used to store signature.
if(pbSignature)
free(pbSignature);
//-------------------------------------------------------------------
// Destroy the hash object.
if(hHash)
CryptDestroyHash(hHash);
//-------------------------------------------------------------------
// Release the provider handle.
if(hProv)
CryptReleaseContext(hProv, 0);
Exit:
return result;
}
int _tmain(int argc, _TCHAR* argv[])
{
DigSign("test3.txt", "test.sig");
printf("TEST: %u\n", CheckDigSign("test3.txt", "test.sig", ""));
}
DigSign function must sign content of file and write signature and public key to another file. CheckSign must return true if sign is right. But I don't understand why my code doesn't work. CheckDigSign in _tmain must return true, but it returns false. Can anybody help me pls?
I took your entire code sample, hacked it up a little, and used CreateFile, ReadFile, and WriteFile for all the file I/O. It works. The signature file with the appended public key checked against the source file just fine.
I therefore suspect it is the method of reading/writing your files, and specifically, the "w" and "r" vs. "wb" and "rb" that is horking over your bytes. Try changing those and see what you come up with.
For reference, the modified code is below, and there is NO error checking in the changes I made, so DON'T use this for anything special as it is literally worth less than the paper its printed on (i.e. nothing).
#define Check(condition, message) if (!(condition)) { fprintf(stderr, "%s\n", message); goto Exit; };
void DigSign(const char* inputFileName, const char* signFileName)
{
HCRYPTPROV hProv;
BYTE *pbBuffer= NULL;
DWORD dwBufferLen = 0;
HCRYPTHASH hHash;
HCRYPTKEY hKey;
BYTE *pbKeyBlob;
BYTE *pbSignature;
DWORD dwSigLen;
DWORD dwBlobLen;
FILE* inputFile = NULL;
HANDLE hFileInput = CreateFile(inputFileName, // file to open
GENERIC_READ, // open for reading
FILE_SHARE_READ, // share for reading
NULL, // default security
OPEN_EXISTING, // existing file only
FILE_ATTRIBUTE_NORMAL, // normal file
NULL);
dwBufferLen = GetFileSize(hFileInput, NULL);
Check(pbBuffer = (BYTE*)malloc(dwBufferLen + 1), "cannot allocate memory");
DWORD dwBytesRead = 0L;
ReadFile(hFileInput, pbBuffer, dwBufferLen, &dwBytesRead, NULL);
pbBuffer[dwBufferLen] = 0;
CloseHandle(hFileInput);
//-------------------------------------------------------------------
// Acquire a cryptographic provider context handle.
Check(CryptAcquireContext(&hProv, NULL, NULL, PROV_RSA_FULL, 0), "Error during CryptAcquireContext.");
if(!CryptGetUserKey(hProv, AT_SIGNATURE, &hKey))
{
if(NTE_NO_KEY == GetLastError())
{
Check(CryptGenKey(hProv, AT_SIGNATURE, CRYPT_EXPORTABLE, &hKey), "Could not create a user public key.\n");
}
else
{
goto Exit;
}
}
Check(CryptExportKey(hKey, NULL, PUBLICKEYBLOB, 0, NULL, &dwBlobLen), "Error computing BLOB length.");
Check(pbKeyBlob = (BYTE*)malloc(dwBlobLen), "Out of memory. \n");
Check(CryptExportKey(hKey, NULL, PUBLICKEYBLOB, 0, pbKeyBlob, &dwBlobLen), "Error during CryptExportKey.");
//-------------------------------------------------------------------
// Create the hash object.
Check(CryptCreateHash(hProv, CALG_SHA, 0, 0, &hHash), "Error during CryptCreateHash.");
//-------------------------------------------------------------------
// Compute the cryptographic hash of the buffer.
Check(CryptHashData(hHash, pbBuffer, dwBufferLen, 0), "Error during CryptHashData.");
//-------------------------------------------------------------------
// Determine the size of the signature and allocate memory.
dwSigLen= 0;
Check(CryptSignHash(hHash, AT_SIGNATURE, NULL, 0, NULL, &dwSigLen), "Error during CryptSignHash.");
//-------------------------------------------------------------------
// Allocate memory for the signature buffer.
Check(pbSignature = (BYTE *)malloc(dwSigLen), "Out of memory.");
//-------------------------------------------------------------------
// Sign the hash object.
Check(CryptSignHash(hHash, AT_SIGNATURE, NULL, 0, pbSignature, &dwSigLen), "Error during CryptSignHash.");
HANDLE hFileSign = CreateFile(signFileName, // name of the write
GENERIC_WRITE, // open for writing
0, // do not share
NULL, // default security
CREATE_NEW, // create new file only
FILE_ATTRIBUTE_NORMAL, // normal file
NULL); // no attr. template
DWORD dwBytesWritten = 0;
WriteFile(hFileSign, pbSignature, dwSigLen, &dwBytesWritten, NULL);
WriteFile(hFileSign, pbKeyBlob, dwBlobLen, &dwBytesWritten, NULL);
CloseHandle(hFileSign);
printf("W: %.128s\n", pbSignature);
printf("W: %.148s\n", pbKeyBlob);
//-------------------------------------------------------------------
// Destroy the hash object.
if(hHash)
CryptDestroyHash(hHash);
free(pbSignature);
free(pbKeyBlob);
if(hProv)
CryptReleaseContext(hProv, 0);
Exit:;
}
bool CheckDigSign(const char* inputFileName, const char* signFileName, const char* userName)
{
BOOL result = false;
HCRYPTPROV hProv;
BYTE *pbBuffer= (BYTE *)"The data that is to be hashed and signed.";
DWORD dwBufferLen = strlen((char *)pbBuffer)+1;
HCRYPTHASH hHash;
HCRYPTKEY hPubKey;
BYTE *pbKeyBlob;
BYTE *pbSignature;
DWORD dwSigLen;
DWORD dwBlobLen;
HANDLE hFileInput = CreateFile(inputFileName, // file to open
GENERIC_READ, // open for reading
FILE_SHARE_READ, // share for reading
NULL, // default security
OPEN_EXISTING, // existing file only
FILE_ATTRIBUTE_NORMAL, // normal file
NULL);
dwBufferLen = GetFileSize(hFileInput, NULL);
Check(pbBuffer = (BYTE*)malloc(dwBufferLen + 1), "cannot allocate memory");
DWORD dwBytesRead = 0;
ReadFile(hFileInput, pbBuffer, dwBufferLen, &dwBytesRead, NULL);
pbBuffer[dwBufferLen] = 0;
CloseHandle(hFileInput);
HANDLE hFileSig = CreateFile(signFileName, // file to open
GENERIC_READ, // open for reading
FILE_SHARE_READ, // share for reading
NULL, // default security
OPEN_EXISTING, // existing file only
FILE_ATTRIBUTE_NORMAL, // normal file
NULL);
DWORD dwSignFileLen = GetFileSize(hFileSig, NULL);
dwSigLen = 128;
pbSignature = (BYTE*)malloc(dwSigLen);
dwBlobLen = dwSignFileLen - dwSigLen;
pbKeyBlob = (BYTE*)malloc(dwBlobLen);
ReadFile(hFileSig, pbSignature, dwSigLen, &dwBytesRead, NULL);
ReadFile(hFileSig, pbKeyBlob, dwBlobLen, &dwBytesRead, NULL);
CloseHandle(hFileSig);
printf("R: %.128s\n", pbSignature);
printf("R: %.148s\n", pbKeyBlob);
Check(CryptAcquireContext(&hProv, NULL, NULL, PROV_RSA_FULL, 0), "Error during CryptAcquireContext.");
Check(CryptImportKey(hProv, pbKeyBlob, dwBlobLen, 0, 0, &hPubKey), "Public key import failed.");
//-------------------------------------------------------------------
// Create a new hash object.
Check(CryptCreateHash(hProv, CALG_SHA, 0, 0, &hHash), "Error during CryptCreateHash.");
//-------------------------------------------------------------------
// Compute the cryptographic hash of the buffer.
Check(CryptHashData(hHash, pbBuffer, dwBufferLen, 0), "Error during CryptHashData.");
//-------------------------------------------------------------------
// Validate the digital signature.
result = CryptVerifySignature(hHash, pbSignature, dwSigLen, hPubKey, NULL, 0);
printf("%u %x", GetLastError(), GetLastError());
//-------------------------------------------------------------------
// Free memory to be used to store signature.
if(pbSignature)
free(pbSignature);
//-------------------------------------------------------------------
// Destroy the hash object.
if(hHash)
CryptDestroyHash(hHash);
//-------------------------------------------------------------------
// Release the provider handle.
if(hProv)
CryptReleaseContext(hProv, 0);
Exit:
return !!result;
}
int _tmain(int argc, _TCHAR* argv[])
{
if (argc == 3)
{
DigSign(argv[1], argv[2]);
printf("TEST: %u\n", CheckDigSign(argv[1], argv[2],""));
return 0;
}
return 1;
}