Calculating %CPU Usage Using Proc Files - c++

I would like to dedicate a thread in my program to gathering metrics on its performance. Memory usage, CPU etc. I've been trying to do this using the /proc/stat and /proc/pid/stat files. I'm currently stuck at trying to measure the %CPU usage. The values reported by my program are totally out of line with what 'top' is reporting. I'm tried this on a few different linux distros and am seeing the same results on each.
Here is the code I am using to calculate the percentage. Can anyone spot any issues here?
https://github.com/mmcilroy/cpu_usage
#include <stdlib.h>
#include <sys/types.h>
#include <sys/times.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
struct pstat {
long unsigned int utime_ticks;
long int cutime_ticks;
long unsigned int stime_ticks;
long int cstime_ticks;
long unsigned int vsize; // virtual memory size in bytes
long unsigned int rss; //Resident Set Size in bytes
long unsigned int cpu_total_time;
};
int get_usage(const pid_t pid, struct pstat* result) {
//convert pid to string
char pid_s[20];
snprintf(pid_s, sizeof(pid_s), "%d", pid);
char stat_filepath[30] = "/proc/"; strncat(stat_filepath, pid_s,
sizeof(stat_filepath) - strlen(stat_filepath) -1);
strncat(stat_filepath, "/stat", sizeof(stat_filepath) -
strlen(stat_filepath) -1);
FILE *fpstat = fopen(stat_filepath, "r");
if (fpstat == NULL) {
perror("FOPEN ERROR ");
return -1;
}
FILE *fstat = fopen("/proc/stat", "r");
if (fstat == NULL) {
perror("FOPEN ERROR ");
fclose(fstat);
return -1;
}
//read values from /proc/pid/stat
bzero(result, sizeof(struct pstat));
long int rss;
if (fscanf(fpstat, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu"
"%lu %ld %ld %*d %*d %*d %*d %*u %lu %ld",
&result->utime_ticks, &result->stime_ticks,
&result->cutime_ticks, &result->cstime_ticks, &result->vsize,
&rss) == EOF) {
fclose(fpstat);
return -1;
}
fclose(fpstat);
result->rss = rss * getpagesize();
//read+calc cpu total time from /proc/stat
long unsigned int cpu_time[10];
bzero(cpu_time, sizeof(cpu_time));
if (fscanf(fstat, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
&cpu_time[0], &cpu_time[1], &cpu_time[2], &cpu_time[3],
&cpu_time[4], &cpu_time[5], &cpu_time[6], &cpu_time[7],
&cpu_time[8], &cpu_time[9]) == EOF) {
fclose(fstat);
return -1;
}
fclose(fstat);
for(int i=0; i < 4;i++)
result->cpu_total_time += cpu_time[i];
printf( "usage: cpu %lu, utime %lu, stime %lu\n", result->cpu_total_time, result->utime_ticks, result->stime_ticks );
return 0;
}
void calc_cpu_usage_pct(const struct pstat* cur_usage,
const struct pstat* last_usage,
double* usage)
{
printf( "delta: cpu %lu, utime %lu, stime %lu\n",
cur_usage->cpu_total_time - last_usage->cpu_total_time,
cur_usage->utime_ticks - last_usage->utime_ticks,
cur_usage->stime_ticks - last_usage->stime_ticks );
const long unsigned int cpu_diff = cur_usage->cpu_total_time - last_usage->cpu_total_time;
const long unsigned int pid_diff =
( cur_usage->utime_ticks + cur_usage->utime_ticks + cur_usage->stime_ticks - cur_usage->stime_ticks ) -
( last_usage->utime_ticks + last_usage->utime_ticks + last_usage->stime_ticks - last_usage->stime_ticks );
*usage = 100.0 * ( (double)pid_diff / (double)cpu_diff );
}
int main( int argc, char* argv[] )
{
pstat prev, curr;
double pct;
struct tms t;
times( &t );
if( argc <= 1 ) {
printf( "please supply a pid\n" ); return 1;
}
while( 1 )
{
if( get_usage(atoi(argv[1]), &prev) == -1 ) {
printf( "error\n" );
}
sleep( 5 );
if( get_usage(atoi(argv[1]), &curr) == -1 ) {
printf( "error\n" );
}
calc_cpu_usage_pct(&curr, &prev, &pct);
printf("%%cpu: %.02f\n", pct);
}
}
If you want to try it out for yourself, the program expect 1 arguments - the pid of a process to monitor

I know this is a bit old but I can explain why your new equation works: (1/INTERVAL) * (pid diff)
It's just a simplification of the basic percentage equation 100 * (pid diff) / (cpu diff), which looks like what you were trying to do in your first example.
The cpu time in /proc/stat (and the utime and stime in /proc/pid/stat) is reported in USER_HZ (or jiffies). This value is usually 1/100 of a second. This means that there will be 100 "tics" in each second for the CPU, which means your "CPU diff" will be INTERVAL*100.
Substitute that in and you get:
100 * (pid diff) / (INTERVAL * 100)
Cancel out the 100's and you are left with:
(pid diff) / INTERVAL
Which is the same as what you are now using. This also means that if you did indeed correct the problems you have in the code at the top, then that should work as well. The pid diff should be (curr utime + curr stime) - (prev utime + prev stime). If it doesn't work, then perhaps the way you are adding up the CPU time is wrong? It'd be easy to test because you know what value it should be (INTERVAL*100).
Since you now have a working equation, you may not care to figure out the problem with the original code but keep in mind that if you ever try to use it on a system where USER_HZ is not 1/100, the equation will be invalid.

I examined the source for top (from procps). Seems it is essentially performing the following calculation...
(1/interval) * (utime+stime)
Where interval it the number of seconds between samples. utime / stime are read directly from /proc/pid/stat
I must admit I don't understand why this works (it shouldn't according to "man proc"), but I've tested this with numerous different scenarios and the output from my program always matches that of "top".
Would be interested to hear some feedback on why this works :)
Here's my latest source
#include <stdlib.h>
#include <sys/types.h>
#include <sys/times.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#define INTERVAL 3
struct pstat {
long unsigned int utime_ticks;
long int cutime_ticks;
long unsigned int stime_ticks;
long int cstime_ticks;
long unsigned int vsize; // virtual memory size in bytes
long unsigned int rss; //Resident Set Size in bytes
};
int get_usage(const pid_t pid, struct pstat* result) {
//convert pid to string
char pid_s[20];
snprintf(pid_s, sizeof(pid_s), "%d", pid);
char stat_filepath[30] = "/proc/"; strncat(stat_filepath, pid_s,
sizeof(stat_filepath) - strlen(stat_filepath) -1);
strncat(stat_filepath, "/stat", sizeof(stat_filepath) -
strlen(stat_filepath) -1);
FILE *fpstat = fopen(stat_filepath, "r");
if (fpstat == NULL) {
perror("FOPEN ERROR ");
return -1;
}
//read values from /proc/pid/stat
bzero(result, sizeof(struct pstat));
long int rss;
if (fscanf(fpstat, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu"
"%lu %ld %ld %*d %*d %*d %*d %*u %lu %ld",
&result->utime_ticks, &result->stime_ticks,
&result->cutime_ticks, &result->cstime_ticks, &result->vsize,
&rss) == EOF) {
fclose(fpstat);
return -1;
}
fclose(fpstat);
result->rss = rss * getpagesize();
return 0;
}
void calc_cpu_usage_pct(const struct pstat* cur_usage,
const struct pstat* last_usage,
double* usage)
{
const long unsigned int pid_diff =
( cur_usage->utime_ticks + cur_usage->stime_ticks ) -
( last_usage->utime_ticks + last_usage->stime_ticks );
printf( "delta %lu\n", pid_diff );
*usage = 1/(float)INTERVAL * pid_diff;
}
int main( int argc, char* argv[] )
{
pstat prev, curr;
double pct;
struct tms t;
times( &t );
if( argc <= 1 ) {
printf( "please supply a pid\n" ); return 1;
}
while( 1 )
{
if( get_usage(atoi(argv[1]), &prev) == -1 ) {
printf( "error\n" );
}
sleep( INTERVAL );
if( get_usage(atoi(argv[1]), &curr) == -1 ) {
printf( "error\n" );
}
calc_cpu_usage_pct(&curr, &prev, &pct);
printf("%%cpu: %.02f\n", pct);
}
}

This command in linux may be useful for linux.
# apt-get install sysstat
# up2date sysstat
# mpstat
Now you find how to find how to get command line output as string and parse. You can also use different parameters of mpstat.
Also try $ top.
Get help from this link.

The main loop is somewhat off: instead of getting "prev", then sleeping, then getting "next" and calculating the difference, you should get "prev" once outside the loop, and inside the loop get "curr", calculate, the copy "curr" into "prev" and then loop again. This fixes the part where 50% of the used time is not counted.

try seeing the top command source code , source code will be available in busybox
EDIT:
replace mpstat with top as mpstat shows overall usage

Related

Get process creation date-time in OSX with C / C++

Given a process id, what is the best way to find the process's creation date-time using C/C++ ?
I'd suggest looking at the top and ps source code (in particular, libtop.c).
I think the following call should be what you need:
int proc_pidbsdinfo(proc_t p, struct proc_bsdinfo *pbsd, int zombie);
From <sys/proc_info.h>:
struct proc_bsdinfo {
...
struct timeval pbi_start;
...
}
Unfortunately there is no public interface for process inspection so the calls are not only version-dependant but also likely to change in future releases.
Here is a simple utility demonstrating how to do this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/errno.h>
#include <sys/sysctl.h>
int main(int argc, char **argv) {
// Check arguments
if (argc != 2) {
fprintf(stderr,"usage: %s PID\n", argv[0]);
return 1;
}
// Parse and validate PID argument
errno = 0;
char *end = NULL;
long pid = strtol(argv[1], &end, 10);
if (errno != 0 || end == argv[1] || end == NULL || *end != 0 || ((pid_t)pid) != pid) {
fprintf(stderr,"%s: bad PID argument: %s\n", argv[0], argv[1]);
return 1;
}
// Get process info from kernel
struct kinfo_proc info;
int mib[] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, (int)pid };
size_t len = sizeof info;
memset(&info,0,len);
int rc = sysctl(mib, (sizeof(mib)/sizeof(int)), &info, &len, NULL, 0);
if (rc != 0) {
fprintf(stderr,"%s: sysctl failed with errno=%d\n", argv[0], errno);
return 1;
}
// Extract start time and confirm process exists
struct timeval tv = info.kp_proc.p_starttime;
if (tv.tv_sec == 0) {
fprintf(stderr,"%s: no such PID %d\n", argv[0], (int)pid);
return 1;
}
// Convert to string - no \n because ctime() result is already \n-terminated
printf("PID %d started at %s", (int)pid, ctime(&(tv.tv_sec)));
return 0;
}
Compile it: clang -o procstart procstart.c
Example of running it:
$ ./procstart $$
PID 37675 started at Sat Apr 9 20:01:55 2022
Note the sysctl API this is calling is undocumented and as such (at least in theory) subject to change at any time. In practice however, it has been rather stable across releases, so while nobody can predict what Apple might do in the future, I think the risk of them breaking this is low.

Unable to invoke time critical function in ++ using timeGetTime() in VC++

I want to send data through socket. I want my send code to be run automatically after every 10ms, so I used a logic using timeGetTime(). But it is not getting invoked, please help me. The program runs upto now time printing code but it is not entering the if condition!
#include<stdio.h>
#include<conio.h>
#include<iostream>
#include<string.h>
#include<winsock2.h>
#include<windows.h>
#include<Windows.h>
#include<fcntl.h>
#include<errno.h>
#include<time.h>
#pragma comment(lib,"ws2_32.lib")
#pragma comment(lib,"winmm.lib")
#define BUFF_SIZE 10485760
int sock_fd = 0;
char *a;
int nread = 0;
int c = 0;
int k = 0;
int nCount = 0;
int main()
{
int ret=0, j=0;
a = (char*)calloc(BUFF_SIZE, sizeof(char));
WSAData version; //We need to check the version.
WORD mkword=MAKEWORD(2,2);
int what=WSAStartup(mkword,&version);
if(what!=0)
{
std::cout<<"This version is not supported! - \n"<<WSAGetLastError()<<std::endl;
}
else
{
std::cout<<"Initialised.\n"<<std::endl;
}
sock_fd =socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);
if(sock_fd==INVALID_SOCKET)
{
std::cout<<"Creating socket fail\n";
}
else
{
std::cout<<"It was okay to create the socket\n";
}
//Socket address information
sockaddr_in addr;
addr.sin_family=AF_INET;
addr.sin_addr.s_addr=inet_addr("10.0.0.51");
addr.sin_port=htons(5001);
int conn=connect(sock_fd,(SOCKADDR*)&addr,sizeof(addr));
if(conn==SOCKET_ERROR){
std::cout<<"Error - when connecting "<<WSAGetLastError()<<std::endl;
}
else
{
std::cout<<"socket connect succesfully";
}
FILE*fp = fopen("binary.bin","wb+");
printf("\n File opened successfully through fopen()\n");
//filling our dynamic memory pointed by a with some dynamic data
for(k=0;k<BUFF_SIZE;k++)
{
a[k]=k;
}
/* writing in file*/
if (( c = fwrite(a, 1, BUFF_SIZE, fp)) != BUFF_SIZE)
{
printf ("\n Error : file write failure");
_getch();
return 1;
}
printf("\n write bytes=%d\n",c);
/* First read file in chunks of 10 bytes */
fseek(fp, 0, SEEK_SET);
int nread = fread(a, 1, BUFF_SIZE,fp);
printf(" read Bytes %d \n", nread);
unsigned int last_call_time = timeGetTime();
printf("last call time %d\n", last_call_time);
while(true)
{
unsigned int now_time = timeGetTime();
printf("now time %d\n", now_time);
if(now_time > (last_call_time + 10))
{
//call_time_critical_function();
printf("performing time critical logic\n");
for(j=0;j<BUFF_SIZE;j++){
ret = send(sock_fd, a, 1,0) ;
printf("ret %d\n", ret);
last_call_time = timeGetTime();//last time is re initialized
}
}
getch();
return 0;
}
}
Debugging output:
Initialised.
It was okay to create the socket
Error - when connecting 10060
File opened successfully through fopen
write bytes = 10
read bytes =10
last call time 4179968
now time 4179968
Print time and check.
Edit:
Move return outside while loop. Also move getch() outside the while loop.

different results with printf and fprintf

I need function that prints "word=n" (where n in [0..10]) to stream using linux function ssize_t write(int fd, const void *buf, size_t count);. Trying to use fprintf, but it's give strange results : program prints in ~1% of calls "woword=n", and length for example "woword=7" are 7. Printf print all right. I'm doing something wrong or this is the bag ?
if ((id_result = open( out , O_WRONLY)) <= 0) {
fprintf(stderr, "%s : %s\n", currentDateTime().c_str(), "could not open output\0");
ret = P_STREAMS_LOAD_ERROR;
}
void printProbability( int probability ){
char buf[50];
memset( buf, '\0', 50 );
int length = sprintf( buf, "word=%i\n\0", probability );
fprintf(stderr, "debug : word=%i len = %i\n\0", probability, length );
int result = write( id_result, buf, length );
if( result == -1){
fprintf(stderr, "%s : %s\n", currentDateTime().c_str(), "error \n");
}
}
EDITED:
how I understand, we have 2 theorys :
1) mixing printf and write
2) using '\0' and '\n' in fprintf
int length = sprintf( buf, "word=%i", probability );
int result = write( id_result, buf, length );
write( id_result, "\n", 1 );
with this code I still have same errors
aa help me :))
If you are interspersing calls to printf (or write) and fprintf(stderr, ...) the output won't necessarily come out in order. There is buffering going on, and the actual output probably won't switch at the end-of-line character.

Why is MD5Sum so fast

I've been studying hashing in C/C++ and tried to replicate the md5sum command in Linux. After analysing the source code, it seems that md5sum relies on the md5 library's md5_stream. I've approximated the md5_stream function from the md5.h library into the code below, and it runs in ~13-14 seconds. I've tried to call the md5_stream function directly and got ~13-14 seconds. The md5sum runs in 4 seconds. What have the GNU people done to get the speed out of the code?
The md5.h/md5.c code is available in the CoreUtils source code.
#include <QtCore/QCoreApplication>
#include <QtCore/QDebug>
#include <iostream>
#include <iomanip>
#include <fstream>
#include "md5.h"
#define BLOCKSIZE 32784
int main()
{
FILE *fpinput, *fpoutput;
if ((fpinput = fopen("/dev/sdb", "rb")) == 0) {
throw std::runtime_error("input file doesn't exist");
}
struct md5_ctx ctx;
size_t sum;
char *buffer = (char*)malloc (BLOCKSIZE + 72);
unsigned char *resblock = (unsigned char*)malloc (16);
if (!buffer)
return 1;
md5_init_ctx (&ctx);
size_t n;
sum = 0;
while (!ferror(fpinput) && !feof(fpinput)) {
n = fread (buffer + sum, 1, BLOCKSIZE - sum, fpinput);
if (n == 0){
break;
}
sum += n;
if (sum == BLOCKSIZE) {
md5_process_block (buffer, BLOCKSIZE, &ctx);
sum = 0;
}
}
if (n == 0 && ferror (fpinput)) {
free (buffer);
return 1;
}
/* Process any remaining bytes. */
if (sum > 0){
md5_process_bytes (buffer, sum, &ctx);
}
/* Construct result in desired memory. */
md5_finish_ctx (&ctx, resblock);
free (buffer);
for (int x = 0; x < 16; ++x){
std::cout << std::setfill('0') << std::setw(2) << std::hex << static_cast<uint16_t>(resblock[x]);
std::cout << " ";
}
std::cout << std::endl;
free(resblock);
return 0;
}
EDIT: Was a default mkspec problem in Fedora 19 64-bit.
fread() is convenient, but don't use fread() if you care about performance. fread() will copy from the OS to a libc buffer, then to your buffer. This extra copying cost CPU cycles and cache.
For better performance use open() then read() to avoid the extra copy. Make sure your read() calls are multiples of the block size, but lower than your CPU cache size.
For best performance use mmap() map the disk directly to RAM.
If you try something like the below code, it should go faster.
// compile gcc mmap_md5.c -lgcrypt
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <gcrypt.h>
#include <linux/fs.h> // ioctl
#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)
int main(int argc, char *argv[])
{
char *addr;
int fd;
struct stat sb;
off_t offset, pa_offset;
size_t length;
ssize_t s;
unsigned char digest[16];
char digest_ascii[32+1] = {0,};
int digest_length = gcry_md_get_algo_dlen (GCRY_MD_MD5);
int i;
if (argc < 3 || argc > 4) {
fprintf(stderr, "%s file offset [length]\n", argv[0]);
exit(EXIT_FAILURE);
}
fd = open(argv[1], O_RDONLY);
if (fd == -1)
handle_error("open");
if (fstat(fd, &sb) == -1) /* To obtain file size */
handle_error("fstat");
offset = atoi(argv[2]);
pa_offset = offset & ~(sysconf(_SC_PAGE_SIZE) - 1);
if (sb.st_mode | S_IFBLK ) {
// block device. use ioctl to find length
ioctl(fd, BLKGETSIZE64, &length);
} else {
/* offset for mmap() must be page aligned */
if (offset >= sb.st_size) {
fprintf(stderr, "offset is past end of file size=%zd, offset=%d\n", sb.st_size, (int) offset);
exit(EXIT_FAILURE);
}
if (argc == 4) {
length = atoi(argv[3]);
if (offset + length > sb.st_size)
length = sb.st_size - offset;
/* Canaqt display bytes past end of file */
} else { /* No length arg ==> display to end of file */
length = sb.st_size - offset;
}
}
printf("length= %zd\n", length);
addr = mmap(NULL, length + offset - pa_offset, PROT_READ,
MAP_PRIVATE, fd, pa_offset);
if (addr == MAP_FAILED)
handle_error("mmap");
gcry_md_hash_buffer(GCRY_MD_MD5, digest, addr + offset - pa_offset, length);
for (i=0; i < digest_length; i++) {
sprintf(digest_ascii+(i*2), "%02x", digest[i]);
}
printf("hash=%s\n", digest_ascii);
exit(EXIT_SUCCESS);
}
It turned out to be an error in the Qt mkspecs regarding an optimization flag not being set properly.

Output of cuda program is not what was expected

#include<cuda_runtime.h>
#include<stdio.h>
#include<cuda.h>
#include<stdlib.h>
__global__ void setVal(char **c){
c[(blockIdx.y * gridDim.x) + blockIdx.x] = "hello\0";
}
int main(){
char **gpu = NULL;
cudaMalloc((void**)&gpu, 6 * sizeof(char *));
int i;
/*
I cannot access second level directly
for( i =0 ; i < 6 ;i++){
cudaMalloc((void**)&gpu[i], 10 * sizeof(char));
}*/
dim3 grid(3,2);
setVal<<<grid, 1>>>(gpu);
char *p = (char*)malloc(10 * sizeof(char));
char *x[6];
cudaMemcpy(x, gpu, 6*sizeof(char*), cudaMemcpyDeviceToHost);
for( i =0 ; i< 6; i++){
cudaMemcpy(p, x[i], 10*sizeof(char), cudaMemcpyDeviceToHost);
//put synchronize here if problem
printf("%s\n",p);
}
getchar();
return 0;
}
Based on all the suggestions, i revised my code to make my concept correct. But, the code is still not working :(. Any help will be appreciated
Try this -- I tested it on a GTX 285 under CUDA 3.2 -- so it's a bit more restrictive than the current version, but it works.
#include<stdio.h>
#include<string.h>
__global__ void setValues(char** word)
{
volatile char* myWord = word[blockIdx.x];
myWord[0] = 'H';
myWord[1] = 'o';
myWord[2] = 'l';
myWord[3] = 'a';
myWord[4] = '\0';
}
int main()
{
const size_t bufferSize = 32;
const int nObjects = 10;
char* h_x[nObjects];
char** d_x = 0;
cudaMalloc( (void**)(&d_x), nObjects * sizeof(char*) );
for ( int i=0; i < nObjects; i++ )
{
h_x[i] = NULL;
cudaMalloc( (void**)(&h_x[i]), bufferSize * sizeof(char) );
printf("h_x[%d] = %lx\n",i,(unsigned long)h_x[i]);
}
cudaMemcpy( d_x, h_x, nObjects*sizeof(char*), cudaMemcpyHostToDevice);
printf("Copied h_x[] to d_x[]\n");
char msg[] = "Hello World!";
cudaMemcpy( h_x[0], msg, 13*sizeof(char), cudaMemcpyHostToDevice );
/* Force Thread Synchronization */
cudaError err = cudaThreadSynchronize();
/* Check for and display Error */
if ( cudaSuccess != err )
{
fprintf( stderr, "Cuda error in file '%s' in line %i : %s.\n",
__FILE__, __LINE__, cudaGetErrorString( err) );
}
setValues<<<nObjects,1>>>(d_x);
/* Force Thread Synchronization */
err = cudaThreadSynchronize();
/* Check for and display Error */
if ( cudaSuccess != err )
{
fprintf( stderr, "Cuda error in file '%s' in line %i : %s.\n",
__FILE__, __LINE__, cudaGetErrorString( err) );
}
printf("Kernel Completed Successfully. Woot.\n\n");
char p[bufferSize];
printf("d_x = %lx\n", (unsigned long)d_x );
printf("h_x = %lx\n", (unsigned long)h_x );
cudaMemcpy( h_x, d_x, nObjects*sizeof(char*), cudaMemcpyDeviceToHost);
printf("d_x = %lx\n", (unsigned long)d_x );
printf("h_x = %lx\n", (unsigned long)h_x );
for ( int i=0; i < nObjects; i++ )
{
cudaMemcpy( &p, h_x[i], bufferSize*sizeof(char), cudaMemcpyDeviceToHost);
printf("%d p[] = %s\n",i,p);
}
/* Force Thread Synchronization */
err = cudaThreadSynchronize();
/* Check for and display Error */
if ( cudaSuccess != err )
{
fprintf( stderr, "Cuda error in file '%s' in line %i : %s.\n",
__FILE__, __LINE__, cudaGetErrorString( err) );
}
getchar();
return 0;
}
As #Jon notes, you can't pass x (as you had declared) it to the GPU, because it's an address which lives on the CPU. In the code above, I create an array of char*'s and pass them to a char** which I also allocated on the GPU. Hope this helps!
The main problem with your code is that you're not allocating any device memory for the setValues call. You can't pass it a pointer to host memory (char *x[6]) and expect that to work; the CUDA kernels have to operate on CUDA memory. You create that memory, then operate on it, then copy it back:
#include <stdio.h>
#include <string.h>
#include <cuda.h>
#include <cuda_runtime.h>
__global__ void setValues(char *arr){
arr[blockIdx.y * gridDim.x + blockIdx.x] = '4';
}
int main() {
const int NCHARS=6;
char *xd;
cudaMalloc(&xd, NCHARS);
dim3 grid(3,2);
setValues<<<grid,1>>>(xd);
char *p;
p = (char*) malloc(20*sizeof(char));
strcpy(p,"");
cudaMemcpy(p, xd, NCHARS, cudaMemcpyDeviceToHost);
p[NCHARS]='\0';
printf("<%s>\n", p);
getchar();
cudaFree(xd);
return 0;
}
There are several problems I'm seeing here. Here are some of the most obvious ones:
First, my guess is that the character string constant "4" is stored in host (CPU) memory, so you would have to copy it explicitly to device (global) memory. Once the string "4" is in device memory, then you can store a pointer to "4" in a device memory value, such as an element of array arr.
Second, the array x you pass to the setValues kernel is also in host memory. Remember that you need to use cudaMalloc to allocate a (global) device memory region, which an on-device kernel can then point to.