Aloha,
I'm struggling with OpenCL child kernel feature.
Kernel SRC (Minimal example):
kernel void launcher()
{
ndrange_t ndrange = ndrange_1D(1);
enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
^{
size_t id = get_global_id(0);
}
);
}
stdafx.h:
#pragma once
#define __CL_ENABLE_EXCEPTIONS
#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_TARGET_OPENCL_VERSION 200
#include "targetver.h"
#include <CL/cl2.hpp>
#include <iostream>
#include <string>
Full SRC (Minimal):
#include "stdafx.h"
std::string kernel2_source(
"kernel void launcher() ""\n"
"{ ""\n"
" ndrange_t ndrange = ndrange_1D(1);""\n"
" enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,""\n"
" ^{""\n"
" size_t id = get_global_id(0);""\n"
" }""\n"
" );""\n"
"}""\n");
//Number of Input Elements
constexpr int numTriangles = 10;
cl_int errorcode = CL_BUILD_ERROR; //Has to be set to build error, because errorcode isn't set when exception occurs
//Move variable definitions out of main for test purposes;
//Numerous definitions
cl::Program program;
std::vector<cl::Device> devices;
std::vector<cl::Platform> platforms;
cl::CommandQueue queue;
cl::Program::Sources source{ kernel2_source };
int main() {
try {
// Query for platforms
cl::Platform::get(&platforms);
std::cout << "Num Platforms: " << platforms.size() << std::endl;
// Get a list of devices on this platform
platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
std::cout << "Using platform: " << platforms[0].getInfo<CL_PLATFORM_NAME>() << std::endl;
std::cout << "Num Devices: " << devices.size() << std::endl;
// Create a context for the devices
std::cout << "Using device: " << devices[0].getInfo<CL_DEVICE_NAME>() << std::endl;
//Create a context for the first device
//cl::Context context({ devices[0]});
cl::Context context({ devices[0] });
// Create a command−queue for the first device
queue = cl::CommandQueue(context, devices[0]);
cl::DeviceCommandQueue deviceQueue;
deviceQueue = cl::DeviceCommandQueue(context, devices[0]);
// Create the program from the source code
program = cl::Program(context, source);
std::cout << "Building Program" << std::endl;
// Build the program for the devices
errorcode = program.build("-cl-std=CL2.0 -g");
std::cout << "Success!" << std::endl;
cl::Kernel kernel = cl::Kernel(program, "launcher");
cl::NDRange global = numTriangles;
cl::NDRange local = 1;
queue.enqueueNDRangeKernel(kernel, cl::NullRange, global, local);
std::cout << "finished" << std::endl;
std::cin.get();
}
catch (cl::Error error)
{
std::cout << "Error!" << std::endl;
std::cout << error.what() << "(" << error.err() << ")" << std::endl;
std::cout << "Errorcode: " << errorcode << std::endl;
if (errorcode != CL_SUCCESS) { //...
std::cout << "Build Status: " << program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(devices[0]) << std::endl;
//std::cout << "Build Status: " << program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(devices[1]) << std::endl;
std::cout << "Build Options:" << program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(devices[0]) << std::endl;
//std::cout << "Build Options:" << program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(devices[1]) << std::endl;
std::cout << "Build Log:" << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]) << std::endl;
//std::cout << "Build Log:" << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[1]) << std::endl;
}
}
std::cin.get();
return 0;
}
Output:
Num Platforms: 1
Using platform: AMD Accelerated Parallel Processing
Num Devices: 2
Using device: Hawaii
Building Program
=> Exception.
There appears an uncaught exception which is strange, because all build error should be caught.
The ndrange_1D(1) is just for testing purposes (and to produce an acceptable amount of dummy output).
The device (AMD R9 390X) is OpenCL 2.0 capable.
Any ideas how to fix this?
EDIT:
Even not using exceptions and using errorcodes throws this an exception!
Related
Given the following code:
#include <SDL2/SDL.h>
#include <SDL2/SDL_audio.h>
#include <iostream>
#include <chrono>
#include <thread>
static void SDLCallback(void *userData, Uint8 *data, int bytes) {
std::cerr << "SDLCallback: " << bytes / sizeof(float) << "\n";
}
int main() {
using namespace std::literals;
SDL_Init(SDL_INIT_AUDIO);
SDL_AudioDeviceID m_deviceId{};
SDL_AudioSpec m_desired, m_obtained;
m_desired.freq = 48000;
m_desired.format = AUDIO_F32SYS;
m_desired.channels = 2;
m_desired.samples = 1024;
m_desired.callback = SDLCallback;
m_desired.userdata = nullptr;
m_deviceId = SDL_OpenAudioDevice(nullptr, 0, &m_desired, &m_obtained, 0);
std::cerr << "SDL device: " << m_deviceId << std::endl;
if (m_deviceId < 2) {
std::cerr << "SDL: Couldn't open audio: " << SDL_GetError() << std::endl;
exit(1);
}
std::cerr << "rate: " << m_obtained.freq << "\n";
std::cerr << "samples: " << m_obtained.samples << "\n";
std::cerr << "bytes: " << m_obtained.size << "\n";
std::cerr << "channels: " << (int)m_obtained.channels << "\n";
SDL_PauseAudioDevice(m_deviceId, 0);
for (int i = 0; i < 10; i++) {
std::this_thread::sleep_for(100ms);
std::cerr << (SDL_GetAudioDeviceStatus(m_deviceId) == SDL_AUDIO_PLAYING)
<< std::endl;
}
SDL_CloseAudioDevice(m_deviceId);
SDL_Quit();
}
I am seeing the following output:
$ ./a.out
SDL device: 2
rate: 48000
samples: 1024
bytes: 8192
channels:
SDLCallback: 2048
SDLCallback: 2048
1
1
1
1
1
1
1
1
1
1
<program gets stuck on SDL_CloseAudioDevice>
That is, the SDLCallback function is called only twice at the beginning then not anymore, yet the audio is still marked as running. I am using PulseAudio.
What can I do to prevent that ? Where is my program wrong ? It works if I kill PulseAudio, but PulseAudio works fine for all my other software.
And if it is not wrong, how can I make sure that my users will never encounter that issue ? How can I recover without getting the program stuck on SDL_CloseAudioDevice ? As PulseAudio is very common among Linux users.
I did two months search on the web for a proper file locking mechanism to be used in a C++ program.
I found a lot on "C and fnctl" which I could proof to work. But all really proper working locking mechanism, that I could proof to work in Linux are only based on file descriptors.
As this seems to be something really old fashined and in actual C++17 style of writing C++ code with file- and ip-streams not using that mechanism, I only came up with something that works with using what was presented here:
Not able to ofstream using __gnu_cxx::stdio_filebuf
My Question is, is this really the only mechanism working? To connect both worlds?
I looked in all these books to find anything about fcntl and C++, but was not successful:
[Der C++ Programmierer Cxx20]
(https://www.hanser-elibrary.com/doi/book/10.3139/9783446465510)
[The C++ Programming Language] (https://www.stroustrup.com/C++.html)
[C++ Das Umfassende Handbuch]
(https://www.rheinwerk-verlag.de/c-plusplus-das-umfassende-handbuch/)
[Modern C++ Programming Cookbook Second Edition]
(https://www.packtpub.com/product/modern-c-programming-cookbook-second-edition/9781800208988)
My question to the C++ gurus here is, if I missed something, or if the following code is, today, begin of 2021 the best we could do.
Short explanation of what the code is a proof for:
We have a C++ Code which adds usernames and its LSF-processes to a conf-file, which is read by SSH-server to allow user access to that machine. As at the same time two or more running processes of this code could lead to concurrent attempts of adding or deleting users from this file could occur, we have to proof that proper file locking is preventing that. Without using an extra "access" file, which also could be a solution.
This is some example code I tested:
#include <iostream>
#include <string>
#include <thread>
#include <chrono>
#include <fcntl.h>
#include <unistd.h>
#include <ext/stdio_filebuf.h>
using namespace std::this_thread; // for sleep_for
int main( ) {
// set unbuffered concole output
std::cout.setf(std::ios::unitbuf);
const char* filename {"testfile.txt"};
// get input from input_from_user
std::string input_from_user_string;
std::cout << "Please give input to change in the file: ";
std::cin >> input_from_user_string;
int add_1_del_2 = 0;
std::cout << "Please give 1 if you want to add to the file or 2 if you want to delete from file: ";
std::cin >> add_1_del_2;
int input_from_user_time;
std::cout << "Please give seconds to wait: ";
std::cin >> input_from_user_time;
// opening file
std::cout << "Opening File" << std::endl;
mode_t mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH; //664
int fd;
fd = open(filename, O_RDWR | O_CREAT, mode);
// printing out information about file descriptor
std::cout << " Dexc:" << fd << std::endl;
// generating C++-streams on filedescriptor
__gnu_cxx::stdio_filebuf<char> sourcebufin(fd, std::ios::in);
__gnu_cxx::stdio_filebuf<char> sourcebufout(fd, std::ios::out);
std::istream myfilein(&sourcebufin);
std::ostream myfileout(&sourcebufout);
// -----------
// check for file Locking or exit
// -----------
// creating structure for file locking
struct flock fl;
fl.l_type = F_RDLCK;
fl.l_whence = SEEK_SET;
fl.l_start = 0;
fl.l_len = 0;
// set file locking for read
fl.l_type = F_RDLCK;
std::cout << "Checking for Lock on file" << std::endl;
// check for file locking on file for read only once
(void) fcntl(fd, F_GETLK, &fl);
if (fl.l_type != F_UNLCK) {
std::cout << "File is locked for reading by process "
<< fl.l_pid
<< ", in status"
<< ((fl.l_type == F_WRLCK) ? 'W' : 'R')
<< ", start="
<< fl.l_start
<< ", end="
<< fl.l_len
<< std::endl;
}
else {
(void) printf("File is unlocked for reading\n");
}
// set file locking for write
fl.l_type = F_WRLCK;
// check for file locking on file for write in a loop
for (int i = 1; i < 11; i++) {
//printf("Checking for lock %d of 10 times...\n", i);
std::cout << "Checking for lock "
<< i
<< " of 10 times..."
<< std::endl;
(void) fcntl(fd, F_GETLK, &fl);
if (fl.l_type != F_UNLCK) {
//(void) printf("File is locked by process %d, in status %c, start=%8ld, end=%8ld\n", fl.l_pid,
// , fl.l_start, fl.l_len);
std::cout << "File is locked by process "
<< fl.l_pid
<< ", in status"
<< ((fl.l_type == F_WRLCK) ? 'W' : 'R')
<< ", start="
<< fl.l_start
<< ", end="
<< fl.l_len
<< std::endl;
sleep(10);
}
else {
(void) printf("File is unlocked\n");
break;
}
}
// -----------
// apply lock for write on file
// -----------
// locking file
std::cout << "Locking file for write" << std::endl;
// set file locking for write again, as checking on lock resets it
fl.l_type = F_WRLCK;
if (fcntl(fd, F_SETLKW, &fl) == -1) {
perror("fcntl");
abort();
}
// -----------
// wait some time
// -----------
std::cout << "Now waiting for " << input_from_user_time << " seconds, keeping the file locked..." << std::endl;
std::this_thread::sleep_for(std::chrono::seconds(input_from_user_time));
// -----------
// read from file
// -----------
std::cout << "Reading from file... " << std::endl;
myfilein.seekg(0, std::ios::end);
size_t size_before = myfilein.tellg();
myfilein.seekg(0);
std::string filecontent{""};
filecontent.reserve(size_before);
std::cout << "Length of file is: " << size_before << std::endl;
// read full content of file in string "filecontent"
filecontent.assign((std::istreambuf_iterator<char>(myfilein)),
std::istreambuf_iterator<char>());
// -----------
// print output about read data
// -----------
std::cout << "Length of filecontent-string: " << filecontent.size() << std::endl;
std::cout << "Content of File begin" << std::endl;
std::cout << "----------" << std::endl;
std::cout << filecontent << std::endl;
std::cout << "----------" << std::endl;
// -----------
// Apply changes on read in data depending on given input
// -----------
if (add_1_del_2 == 2) {
std::cout << "Runmode: Del" << std::endl;
std::string string_to_delete = input_from_user_string+"\n";
std::string::size_type pos_of_found_substring = filecontent.find(string_to_delete);
if (pos_of_found_substring != std::string::npos) {
filecontent.erase(pos_of_found_substring, string_to_delete.length());
}
else {
}
}
if (add_1_del_2 == 1) {
std::cout << "Runmode: Append" << std::endl;
filecontent.append(input_from_user_string);
}
std::cout << "Content of String after change" << std::endl;
std::cout << "----------" << std::endl;
std::cout << filecontent << std::endl;
std::cout << "----------" << std::endl;
// -----------
// write out to file, truncate before to length of new string
// -----------
std::cout << "Now starting the write out..." << std::endl;
myfilein.seekg(0);
ftruncate(fd,filecontent.length());
myfileout.seekp(0);
myfileout << filecontent;
myfileout.flush();
myfileout.clear();
// -----------
// read from file for a second time and printout content
// -----------
std::cout << "Reading from file again... " << std::endl;
myfilein.seekg(0, std::ios::end);
size_t size_after = myfilein.tellg();
myfilein.seekg(0);
std::string filecontent_after{""};
filecontent_after.reserve(size_after);
std::cout << "Length of file is now: " << size_after << std::endl;
// read full content of file in string "filecontent"
filecontent_after.assign((std::istreambuf_iterator<char>(myfilein)),
std::istreambuf_iterator<char>());
std::cout << "Length of filecontent_after-string: " << filecontent_after.size() << std::endl;
std::cout << "Content of File end" << std::endl;
std::cout << "----------" << std::endl;
std::cout << filecontent_after << std::endl;
std::cout << "----------" << std::endl;
// -----------
// unlocking file and close file
// -----------
printf("Unlocking...\n");
fl.l_type = F_UNLCK;
if (fcntl(fd, F_SETLK, &fl) == -1) {
perror("fcntl");
abort();
}
close(fd);
// -----------
// done
// -----------
std::cout << "done" << std::endl;
exit(0);
}
I ask for your comments on this or perhaps how to improve.
Alexander Bruns
I'm encountering an unexpected performance with my OpenCL code (more precisely, I use boost::compute 1.67.0). For now, I just want to add each elements of 2 buffers c[i] = a[i] + b[i].
I noticed some speed reduction in comparison of an existing SIMD implementation so I isolated each step to highlight which one is time consuming. Here is my code sample :
Chrono chrono2;
chrono2.start();
Chrono chrono;
ipReal64 elapsed;
// creating the OpenCL context and other stuff
// ...
std::string kernel_src = BOOST_COMPUTE_STRINGIZE_SOURCE(
__kernel void add_knl(__global const uchar* in1, __global const uchar* in2, __global uchar* out)
{
size_t idx = get_global_id(0);
out[idx] = in1[idx] + in2[idx];
}
);
boost::compute::program* program = new boost::compute::program;
try {
chrono.start();
*program = boost::compute::program::create_with_source(kernel_src, context);
elapsed = chrono.elapsed();
std::cout << "Create program : " << elapsed << "s" << std::endl;
chrono.start();
program->build();
elapsed = chrono.elapsed();
std::cout << "Build program : " << elapsed << "s" << std::endl;
}
catch (boost::compute::opencl_error& e) {
std::cout << "Error building program : " << std::endl << program->build_log() << std::endl << e.what() << std::endl;
return;
}
boost::compute::kernel* kernel = new boost::compute::kernel;
try {
chrono.start();
*kernel = program->create_kernel("add_knl");
elapsed = chrono.elapsed();
std::cout << "Create kernel : " << elapsed << "s" << std::endl;
}
catch (const boost::compute::opencl_error& e) {
std::cout << "Error creating kernel : " << std::endl << e.what() << std::endl;
return;
}
try {
chrono.start();
// Pass the argument to the kernel
kernel->set_arg(0, bufIn1);
kernel->set_arg(1, bufIn2);
kernel->set_arg(2, bufOut);
elapsed = chrono.elapsed();
std::cout << "Set args : " << elapsed << "s" << std::endl;
}
catch (const boost::compute::opencl_error& e) {
std::cout << "Error setting kernel arguments: " << std::endl << e.what() << std::endl;
return;
}
try {
chrono.start();
queue.enqueue_1d_range_kernel(*kernel, 0, sizeX*sizeY, 0);
elapsed = chrono.elapsed();
std::cout << "Kernel calculation : " << elapsed << "s" << std::endl;
}
catch (const boost::compute::opencl_error& e) {
std::cout << "Error executing kernel : " << std::endl << e.what() << std::endl;
return;
}
std::cout << "[Function] Full duration " << chrono2.elapsed() << std::endl;
chrono.start();
delete program;
elapsed = chrono.elapsed();
std::cout << "Delete program : " << elapsed << "s" << std::endl;
delete kernel;
elapsed = chrono.elapsed();
std::cout << "Delete kernel : " << elapsed << "s" << std::endl;
And here is a sample of result (I run my program on a NVidia GeForce GT 630, with NVidia SDK TookKit) :
Create program : 0.0013123s
Build program : 0.0015421s
Create kernel : 6.6e-06s
Set args : 1.7e-06s
Kernel calculation : 0.0001639s
[Function] Full duration : 0.0077794
Delete program : 4.1e-06s
Delete kernel : 0.0879901s
I know my program is simple and I don't expect having the kernel execution being the most time consumming step. However, I thought the kernel deletion would take only a few ms, such as creating or building the program.
Is this a normal behaviour?
Thanks
I'll point out that I've never used boost::compute, but it looks like it's a fairly thin wrapper over OpenCL, so the following should be correct:
Enqueueing the kernel does not wait for it to complete. The enqueue function returns an event, which you can then wait for, or you can wait for all tasks enqueued onto the queue to complete. You are timing neither of those things. What is likely happening is that when you destroy your kernel, it waits for all queued instances which are still pending to complete before returning from the destructor.
I'm on Ubuntu 18.04 and try to read AT commands from a modem. I need to do some changes to the serial interface to properly communicate these commands. However, if I try setting the baudrate, it stays the same. I use the following code:
#include <iostream>
#include <exception>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <termios.h>
#include <string.h>
#define DEVICE "/dev/ttyUSB2"
using namespace std;
class ATException : public exception {
public:
ATException(string msg) : msg(msg) { }
virtual const char* what() const throw() {
return msg.c_str();
}
private:
string msg;
};
void set_speeds(int fd, speed_t ispeed, speed_t ospeed) {
struct termios term_args;
int result = tcgetattr(fd, &term_args);
if (result != 0) throw ATException ("unable to tcgetattr device");
term_args.c_cflag = CS8 | CREAD | CLOCAL;
if (cfsetispeed(&term_args, ispeed) != 0) throw ATException("error setting i speed");
if (cfsetospeed(&term_args, ospeed) != 0) throw ATException("error setting o speed");
if (tcsetattr(fd, TCSANOW, &term_args) != 0) throw ATException("unable to set device attr");
}
void get_speeds(int fd, speed_t& ispeed, speed_t& ospeed) {
struct termios term_args;
memset(&term_args, 0, sizeof term_args);
int result = tcgetattr(fd, &term_args);
if (result != 0) throw ATException ("unable to tcgetattr device");
ispeed = cfgetispeed(&term_args);
ospeed = cfgetospeed(&term_args);
}
int main (int argc, char** argv) {
speed_t ispeed;
speed_t ospeed;
struct termios term_args;
int device_fd = open(DEVICE, O_RDWR);
if (device_fd == -1) {
cerr << "unable to open device " << DEVICE << endl;
exit(1);
}
try {
cout << " B9600 speed: " << B9600 << endl;
cout << " B38400 speed: " << B38400 << endl;
cout << "B115200 speed: " << B115200 << endl;
cout << "B230400 speed: " << B230400 << endl << endl;
get_speeds(device_fd, ispeed, ospeed);
cout << "current i speed: " << ispeed << endl;
cout << "current o speed: " << ospeed << endl;
speed_t test_speeds [] = {B38400, B115200, B230400};
for (int i = 0; i < (sizeof(test_speeds) / sizeof(speed_t)); i++) {
cout << "setting speeds to " << test_speeds[i] << endl;
set_speeds(device_fd, test_speeds[i], test_speeds[i]);
get_speeds(device_fd, ispeed, ospeed);
cout << "current i speed: " << ispeed << endl;
cout << "current o speed: " << ospeed << endl;
}
int chars_written;
// writing/reading
chars_written = write(device_fd, "AT\r", 3);
cout << "written " << chars_written << " chars" << endl;
char read_buf [16];
int chars_read = read(device_fd, read_buf, sizeof(read_buf));
cout << "read (" << chars_read << "): " << read_buf << endl;
usleep(50000);
chars_read = read(device_fd, read_buf, sizeof(read_buf));
cout << "read (" << chars_read << "): " << read_buf << endl;
} catch (ATException& e) {
cerr << "ERROR: " << e.what() << DEVICE << endl;
}
close(device_fd);
}
If I execute this, I get the following output:
B9600 speed: 13
B38400 speed: 15
B115200 speed: 4098
B230400 speed: 4099
current i speed: 13
current o speed: 13
setting speeds to 15
current i speed: 13
current o speed: 13
setting speeds to 4098
current i speed: 13
current o speed: 13
setting speeds to 4099
current i speed: 13
current o speed: 13
written 3 chars
...
It seems like the baudrate hasn't changed. Why is this? Did I miss something? Thanks for help. Let me know if there's something which needs to be clarified, please!
If you consider down rating my question, please also leave a comment why, so I can improve my question or learn for the future. Thanks.
The problem: AVFormatContext::nb_streams has too larve value.
My C++ code (error handling, includes etc. omited to reduce the listing):
void printA(AVFormatContext* _a)
{
std::cout << "duration " << (unsigned long)_a->duration << "\n";
std::cout << "streams " << _a->nb_streams << "\n";
std::cout << "format name " << _a->iformat->name << "\n";
std::cout << "bit_rate " << _a->bit_rate << "\n";
std::cout << "long name " << _a->iformat->long_name << "\n";
}
int main(int argc, char **argv)
{
if ( argc < 2 )
{
std::cout << "Usage: " << argv[0] << " <file>\n";
return 1;
}
av_register_all();
AVFormatContext *pFormatCtx = avformat_alloc_context();
avformat_open_input (&pFormatCtx, argv[1], NULL, NULL);
avformat_find_stream_info(pFormatCtx, NULL);
printA( pFormatCtx );
return 0;
}
Running:
xx#xx /tmp/avlib $ ./avlibtest /ar/video/Nauka.Sna.2006.HDRip.AVC.mkv
[matroska,webm # 0x804c040] max_analyze_duration reached
[matroska,webm # 0x804c040] Estimating duration from bitrate, this may be inaccurate
duration 134741408
streams 134531840 <---- !!! :-O
format name matroska,webm
bit_rate 0
long name Matroska/WebM file format
But the "avplay" program works well.