use of undeclared identifier 'temp' - c++

The below c++ code gives me error use of undeclared identifier 'temp'
/home/test/include/memory.h
#ifndef MEMORY_H
#define MEMORY_H
#if __ENABLE_MEMORY
__device__ int temp = 50;
extern "C" inline __device__ void* memory(){
...
temp = temp + 100;
...
}
#endif
#endif
/home/test/include/internal_memory.h
#ifndef INTERNAL_MEMORY_H
#define INTERNAL_MEMORY_H
#ifndef __ENABLE_MEMORY
#define __ENABLE_MEMORY 1
#endif
#if __ENABLE_MEMORY
extern "C" __device__ void* memory();
static inline __device__ void* call_memory(){ return memory();}
#endif
#include <memory.h>
#endif
/home/test/main.cpp
#include "internal_memory.h"
..
.
.
void show(){
std::cout << "temp is: " << temp << std::endl;
}
.
.
.
I m compiling the code with clang++ version 11.0.0.
clang --version looks as follows:
clang version 11.0.0
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /opt/rocm/llvm/bin
Im compiling the code as below.
/opt/rocm/llvm/bin/clang++ -DDEBUG -D__x86_64__ -I/home/test/include -I/home/test -g -fPIC -std=c++14 -o main.o -c main.cpp
what am i missing here.

Function memory has the __device__ qualifier (I suppose you are compiling a CUDA program).
memory refers to temp, which is a global variable in host memory. You probably want __device__ int temp = 50;
To query temp, you would need to copy its value back to the host. Along the lines of
int host;
cudaMemcpy(&host, &temp, sizeof(int), cudaMemcpyDeviceToHost);

Related

How to compile C++ with CUB library?

I am using the CUB device function just like the example here (https://forums.developer.nvidia.com/t/cub-library/37675/2). I was able to compile the .cu source file in the above example using nvcc.
However, I wonder if it is possible to call CUB device function in .cpp source file and compile the .cpp source file (using nvcc or g++)? I know its possible for thrust, since the example here works for me.
Currently I simply move the main function into a new main.cpp file and include the cub header file in main.cpp, but I failed to compile it using nvcc or g++ because of the same errors, part of the error message:
/home/xx/cub/cub/block/specializations/../../block/../util_type.cuh:261:5: error: ‘__host__’ does not name a type; did you mean ‘__loff_t’?
__host__ __device__ __forceinline__ NullType& operator =(const T&) { return *this; }
^~~~~~~~
__loff_t
/home/xx/cub/cub/block/specializations/../../block/../util_type.cuh:316:19: error: ‘short4’ was not declared in this scope
__CUB_ALIGN_BYTES(short4, 8)
^
/home/xx/cub/cub/block/specializations/../../block/../util_type.cuh:314:52: error: ISO C++ forbids declaration of ‘__align__’ with no type [-fpermissive]
{ enum { ALIGN_BYTES = b }; typedef __align__(b) t Type; };
^
/home/xx/cub/cub/block/specializations/../../block/../util_type.cuh:545:9: error: ‘__host__’ does not name a type; did you mean ‘__loff_t’?
__host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \
^
/home/xx/cub/cub/block/specializations/../../block/../util_arch.cuh:64:38: error: ‘__host__’ does not name a type; did you mean ‘CUhostFn’?
#define CUB_RUNTIME_FUNCTION __host__ __device__
^
/home/xx/cub/cub/device/../iterator/arg_index_input_iterator.cuh:144:25: error: ‘__forceinline__’ does not name a type; did you mean ‘__thrust_forceinline__’?
__host__ __device__ __forceinline__ ArgIndexInputIterator(
^~~~~~~~~~~~~~~
__thrust_forceinline__
/home/xx/cub/cub/device/device_reduce.cuh:148:12: error: ‘cudaError_t’ does not name a type; did you mean ‘cudaError_enum’?
static cudaError_t Reduce(
^~~~~~~~~~~
cudaError_enum
Here are my source files:
device.h
#pragma once
#include <cub/cub.cuh>
void scan_on_device();
device.cu
#include "device.h"
void scan_on_device()
{
// Declare, allocate, and initialize device pointers for input and output
int num_items = 7;
int *d_in;
int h_in[] = {8, 6, 7, 5, 3, 0, 9};
int sz = sizeof(h_in)/sizeof(h_in[0]);
int *d_out; // e.g., [ , , , , , , ]
cudaMalloc(&d_in, sz*sizeof(h_in[0]));
cudaMalloc(&d_out, sz*sizeof(h_in[0]));
cudaMemcpy(d_in, h_in, sz*sizeof(h_in[0]), cudaMemcpyHostToDevice);
printf("\nInput:\n");
for (int i = 0; i < sz; i++) printf("%d ", h_in[I]);
// Determine temporary device storage requirements
void *d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items);
// Allocate temporary storage
cudaMalloc(&d_temp_storage, temp_storage_bytes);
// Run inclusive prefix sum
cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items);
// d_out s<-- [8, 14, 21, 26, 29, 29, 38]
cudaMemcpy(h_in, d_out, sz*sizeof(h_in[0]), cudaMemcpyDeviceToHost);
printf("\nOutput:\n");
for (int i = 0; i < sz; i++) printf("%d ", h_in[i]);
printf("\n");
}
host.cpp
#include "device.h"
#include <cub/cub.cuh>
int main(void)
{
scan_on_device();
return 0;
}
I tried to compile them in three steps:
nvcc -O2 -c device.cu -I/home/xx/cub
g++ -O2 -c host.cpp -I/usr/local/cuda/include/ -I/home/xx/cub
g++ -o tester device.o host.o -L/usr/local/cuda/lib64 -lcudart
The first step went well, but the second step gives the above errors. Any ideas are appreciated. Maybe I mess up some links (to cuda or cub)?
The cub header library (e.g. cub.cuh) contains CUDA C++ code. Such code cannot be compiled by an ordinary host compiler like g++. You will get compile errors if you try to do so.
However your project doesn't require cub.cuh to be in your device.h header file, nor does it require cub.cuh to be compiled by g++. The only thing needed in your device.h header file is the function prototype for scan_on_device().
Therefore if you include the cub header file in the function implementation file device.cu, and remove it elsewhere in your project, your code will compile.

Error: Invalid use of void

I'm compiling C++ source (main.cpp) with a C header (hps_linux.h). The code in hps_linux.h is:
#ifndef HPS_LINUX_H_
#define HPS_LINUX_H_
#include <stdbool.h>
#include <stdint.h>
#include "socal/hps.h"
int fd_dev_mem = 0;
void *h2f_lw_axi_master = NULL;
size_t h2f_lw_axi_master_span = ALT_LWFPGASLVS_UB_ADDR -ALT_LWFPGASLVS_LB_ADDR + 1;
size_t h2f_lw_axi_master_ofst = ALT_LWFPGASLVS_OFST;
#endif
hps_linux.h includes hps.h, that has the next defines:
#define ALT_LWFPGASLVS_OFST 0xff200000
#define ALT_LWFPGASLVS_ADDR ALT_CAST(void *, (ALT_CAST(char *, ALT_HPS_ADDR) + ALT_LWFPGASLVS_OFST))
#define ALT_LWFPGASLVS_LB_ADDR ALT_LWFPGASLVS_ADDR
#define ALT_LWFPGASLVS_UB_ADDR ALT_CAST(void *, ((ALT_CAST(char *, ALT_LWFPGASLVS_ADDR) + 0x200000) - 1))
My main.cpp includes hps_linux.h . I'm compiling like this:
gcc -Wall -std=gnu99 hps_linux.c -o hps_linux.o
g++ -Wall -std=c++0x main.cpp -o main
And it throws the next error:
hps_linux.h: error: invalid use of 'void'
In line:
size_t h2f_lw_axi_master_span = ALT_LWFPGASLVS_UB_ADDR -ALT_LWFPGASLVS_LB_ADDR + 1;
When I compile it with a main written in C (main.c), it works.
Looking at the source code here, we have:
#ifdef __ASSEMBLY__
# define ALT_CAST(type, ptr) ptr // <-- not (ptr)???
#else
# define ALT_CAST(type, ptr) ((type) (ptr))
#endif /* __ASSEMBLY__ */
Now, we have:
size_t h2f_lw_axi_master_span = ALT_LWFPGASLVS_UB_ADDR - ALT_LWFPGASLVS_LB_ADDR + 1;
Partially expanding the macros, we have:
size_t h2f_lw_axi_master_span = ALT_CAST(void *, ALT_CAST(char *, ...)) - ALT_CAST(void *, ALT_CAST(char *, ...)) + 1;
Now, if __ASSEMBLY__ is set, this line is:
size_t h2f_lw_axi_master_span = ... - ... + 1;
Both ... expressions are integer, so this is OK. However, if __ASSEMBLY__ is not set, this line is:
size_t h2f_lw_axi_master_span = (void *) (...) - (void *) (...) + 1;
Subtracting two void pointers is not defined, so the compiler gives up.
Therefore, you need to ensure __ASSEMBLY__ is defined; one way would be:
g++ -Wall -D__ASSEMBLY__ -std=c++0x main.cpp -o main
However, this might cause problems as you should be relying on the earlier header files to set it correctly.
Update: A quick search of the git archive does not show __ASSEMBLY__ being set, and given the name it suggests it should be a compiler built-in...

How to mix C++ and C correctly

I am having some problems with this: I need to write a C wrapper for a C++ library. Say I have 3 files:
wrapper.h
typedef struct Foo Foo;
Foo* create_foo();
wrapper.cpp
extern "C" {
#include "wrapper.h"
}
#include "foo.h"
Foo* create_foo() {
return new Foo;
}
foo.h
class Foo {
public:
Foo();
};
This compiles fine:
clang++ -std=c++14 wrapper.cpp foo.h wrapper.h -shared -fPIC
clang++ -shared -o libbindings.so a.out
but when compiling the program that uses the C wrapper (it is compiler and linked by the programming language that uses the wrapper - Crystal), I get an undefined reference to create_foo() and a linker error collect2: error: ld returned 1 exit status. How can I debug this (and what am I doing wrong)?
Here is a working example:
wrapper.h (C & C++ aware)
#ifndef WRAPPER_H_
#define WRAPPER_H_
#ifdef __cplusplus
extern "C" {
#endif
typedef struct CPPClass CPPClass;
CPPClass* CPPClass_new();
void CPPClass_do_something(CPPClass* cppclass);
int CPPClass_get_state(CPPClass* cppclass);
void CPPClass_delete(CPPClass* cppclass);
#ifdef __cplusplus
}
#endif
#endif /* WRAPPER_H_ */
wrapper.cpp (C++ only)
#include "wrapper.h"
class CPPClass
{
int state;
public:
CPPClass(): state(0) {}
void do_something() { ++state; }
int get_state() const { return state; }
};
extern "C" CPPClass* CPPClass_new()
{
return new CPPClass;
}
extern "C" void CPPClass_do_something(CPPClass* cppclass)
{
cppclass->do_something();
}
extern "C" int CPPClass_get_state(CPPClass* cppclass)
{
return cppclass->get_state();
}
extern "C" void CPPClass_delete(CPPClass* cppclass)
{
delete cppclass;
}
use-wrapper.c (C only)
#include <stdio.h>
#include "wrapper.h"
int main(void)
{
CPPClass* cppclass = CPPClass_new();
if(!cppclass)
{
printf("ERROR: failed to create CPPClass:\n");
return 1;
}
printf("state: %d\n", CPPClass_get_state(cppclass));
CPPClass_do_something(cppclass);
printf("state: %d\n", CPPClass_get_state(cppclass));
CPPClass_delete(cppclass);
}
Compile CPP
g++ -std=c++11 -shared -fPIC -o libwrapper.so wrapper.cpp
Compile C
gcc -o use-wrapper use-wrapper.c -L. -lwrapper -lstdc++
Output:
$ ./use-wrapper
state: 0
state: 1
Hope that helps.
You are creating a shared object named a.out, then another shared object named libbindings.so that ostensibly links to a.out but references nothing from it. Now if a set of input files doesn't have any undefined symbols, no libraries are searched or added to the output. So libbindings.so is essentially an empty library. Verify:
% nm a.out | grep create_foo
00000000000006bc T create_foo
% nm libbindings.so | grep create_foo
%
If you have several source files, you should build an object file from each source (use -c compilation flag), (then optionally combine the objects into a static library --- skip this step if you are not releasing static libraries) then build a shared object from previously built objects:
clang++ -c -fPIC foo.cpp
clang++ -c -fPIC bar.cpp
clang++ -shared -o libfoobar.so foo.o bar.o
If you only have one source, or very few source files you can easily compile together, you can build the shared library in one step:
clang++ -std=c++14 wrapper.cpp somethingelse.cpp -shared -fPIC -o libbindings.so
This is not recommended for large projects.

CUDA __device__ Unresolved extern function [duplicate]

This question already has an answer here:
External calls are not supported - CUDA
(1 answer)
Closed 7 years ago.
I am trying to understand how to decouple CUDA __device__ codes in separate header files.
I have three files.
File: 1: int2.cuh
#ifndef INT2_H_
#define INT2_H_
#include "cuda.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
__global__ void kernel();
__device__ int k2(int k);
int launchKernel(int dim);
#endif /* INT2_H_ */
File 2: int2.cu
#include "int2.cuh"
#include "cstdio"
__global__ void kernel() {
int tid = threadIdx.x;
printf("%d\n", k2(tid));
}
__device__ int k2(int i) {
return i * i;
}
int launchKernel(int dim) {
kernel<<<1, dim>>>();
cudaDeviceReset();
return 0;
}
File 3: CUDASample.cu
include <stdio.h>
#include <stdlib.h>
#include "int2.cuh"
#include "iostream"
using namespace std;
static const int WORK_SIZE = 256;
__global__ void sampleCuda() {
int tid = threadIdx.x;
// printf("%d\n", k2(tid)); //Can not call k2
printf("%d\n", tid * tid);
}
int main(void) {
int var;
var = launchKernel(16);
kernel<<<1, 16>>>();
cudaDeviceReset();
sampleCuda<<<1, 16>>>();
cudaDeviceReset();
return 0;
}
The code works fine. I can call the sampleCuda() kernel (in same file), call the C function launchKernel() (in other file), and call kernel() directly (in other file).
However, I get the following error when calling the __device__ function from the sampleCuda() kernel. The same function is callable in kernel().
10:58:11 **** Incremental Build of configuration Debug for project CUDASample ****
make all
Building file: ../src/CUDASample.cu
Invoking: NVCC Compiler
/Developer/NVIDIA/CUDA-6.5/bin/nvcc -G -g -O0 -gencode arch=compute_20,code=sm_20 -odir "src" -M -o "src/CUDASample.d" "../src/CUDASample.cu"
/Developer/NVIDIA/CUDA-6.5/bin/nvcc -G -g -O0 --compile --relocatable-device-code=false -gencode arch=compute_20,code=compute_20 -gencode arch=compute_20,code=sm_20 -x cu -o "src/CUDASample.o" "../src/CUDASample.cu"
../src/CUDASample.cu(18): warning: variable "var" was set but never used
../src/CUDASample.cu(8): warning: variable "WORK_SIZE" was declared but never referenced
../src/CUDASample.cu(18): warning: variable "var" was set but never used
../src/CUDASample.cu(8): warning: variable "WORK_SIZE" was declared but never referenced
ptxas fatal : Unresolved extern function '_Z2k2i'
make: *** [src/CUDASample.o] Error 255
10:58:14 Build Finished (took 2s.388ms)
How do I call the __device__ function from the sampleCuda() kernel ?
The issue is that you defined a __device__ function in separate compilation unit from __global__ that calls it. You need to either explicitely enable relocatable device code mode by adding -dc flag or move your definition to the same unit.
From nvcc documentation:
--device-c|-dc Compile each .c/.cc/.cpp/.cxx/.cu input file into an object file that contains relocatable device code. It is equivalent to
--relocatable-device-code=true --compile.
See Separate Compilation and Linking of CUDA C++ Device Code for more information.

compilation error about exceptions

I met some compilation error but do not know what the problem is. The code seems not use exception, but the error is about it.
//in misc.h:
char *basename(char *name); // line 94
// in misc.cc:
char *basename(char *name) { // line 12
char *result = name;
while(*name) {
if(*name == '/') result = name + 1;
name++;
}
return result;
}
Compilation error
g++ -pipe -W -Wall -fopenmp -ggdb3 -O2 -c -o misc.o ../../src/misc.cc
../../src/misc.cc: In function ‘char* basename(char*)’:
../../src/misc.cc:12: error: declaration of ‘char* basename(char*)’ throws different exceptions
../../src/misc.h:94: error: from previous declaration ‘char* basename(char*) throw ()’
make: *** [misc.o] Error 1
Does someone have some clue? Thanks and regards!
EDIT:
Files included in misc.h are
#include <iostream>
#include <cmath>
#include <fstream>
#include <cfloat>
#include <stdlib.h>
#include <string.h>
EDIT:
in misc.i generated by -E option,
extern "C++" char *basename (char *__filename)
throw () __asm ("basename") __attribute__ ((__nonnull__ (1)));
extern "C++" __const char *basename (__const char *__filename)
throw () __asm ("basename") __attribute__ ((__nonnull__ (1)));
# 640 "/usr/include/string.h" 3 4
# 1 "/usr/include/bits/string3.h" 1 3 4
# 23 "/usr/include/bits/string3.h" 3 4
extern void __warn_memset_zero_len (void) __attribute__((__warning__ ("memset used with constant zero length parameter; this could be due to transposed parameters")));
# 48 "/usr/include/bits/string3.h" 3 4
extern __inline __attribute__ ((__always_inline__)) __attribute__ ((__gnu_inline__, __artificial__)) void *
memcpy (void *__restrict __dest, __const void *__restrict __src, size_t __len) throw ()
{
return __builtin___memcpy_chk (__dest, __src, __len, __builtin_object_size (__dest, 0));
}
...
# 641 "/usr/include/string.h" 2 3 4
...
You may be picking up the definition of basename() from libgen.h. On my OpenSUSE system, the version in libgen.h is defined with "throw ()" at the end (via the __THROW macro).
One thing you can try is to tell gcc to only run the preprocessor stage by adding the -E flag and then search for basename to see what is being defined:
g++ -pipe -W -Wall -fopenmp -ggdb3 -O2 -E -o misc.i ../../src/misc.cc
If that is happening, you'll either need to drop the include of libgen.h, match the throw specifier or change the name of your function.
" ../../src/misc.h:94: error: from previous declaration ‘char* basename(char*) throw ()’ "
I'm reading that as having been declared twice, once with throw() and once without.
Compiles for me, same flags.
g++ (Gentoo 4.3.4 p1.0, pie-10.1.5) 4.3.4
Your error says that there is a declaration of ‘char* basename(char*) throw ()’
try opening misc.h and searching for throw in the entire file, to see if you put the throw in yourself and just forgot about it.