CUDA __device__ Unresolved extern function [duplicate] - c++

This question already has an answer here:
External calls are not supported - CUDA
(1 answer)
Closed 7 years ago.
I am trying to understand how to decouple CUDA __device__ codes in separate header files.
I have three files.
File: 1: int2.cuh
#ifndef INT2_H_
#define INT2_H_
#include "cuda.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
__global__ void kernel();
__device__ int k2(int k);
int launchKernel(int dim);
#endif /* INT2_H_ */
File 2: int2.cu
#include "int2.cuh"
#include "cstdio"
__global__ void kernel() {
int tid = threadIdx.x;
printf("%d\n", k2(tid));
}
__device__ int k2(int i) {
return i * i;
}
int launchKernel(int dim) {
kernel<<<1, dim>>>();
cudaDeviceReset();
return 0;
}
File 3: CUDASample.cu
include <stdio.h>
#include <stdlib.h>
#include "int2.cuh"
#include "iostream"
using namespace std;
static const int WORK_SIZE = 256;
__global__ void sampleCuda() {
int tid = threadIdx.x;
// printf("%d\n", k2(tid)); //Can not call k2
printf("%d\n", tid * tid);
}
int main(void) {
int var;
var = launchKernel(16);
kernel<<<1, 16>>>();
cudaDeviceReset();
sampleCuda<<<1, 16>>>();
cudaDeviceReset();
return 0;
}
The code works fine. I can call the sampleCuda() kernel (in same file), call the C function launchKernel() (in other file), and call kernel() directly (in other file).
However, I get the following error when calling the __device__ function from the sampleCuda() kernel. The same function is callable in kernel().
10:58:11 **** Incremental Build of configuration Debug for project CUDASample ****
make all
Building file: ../src/CUDASample.cu
Invoking: NVCC Compiler
/Developer/NVIDIA/CUDA-6.5/bin/nvcc -G -g -O0 -gencode arch=compute_20,code=sm_20 -odir "src" -M -o "src/CUDASample.d" "../src/CUDASample.cu"
/Developer/NVIDIA/CUDA-6.5/bin/nvcc -G -g -O0 --compile --relocatable-device-code=false -gencode arch=compute_20,code=compute_20 -gencode arch=compute_20,code=sm_20 -x cu -o "src/CUDASample.o" "../src/CUDASample.cu"
../src/CUDASample.cu(18): warning: variable "var" was set but never used
../src/CUDASample.cu(8): warning: variable "WORK_SIZE" was declared but never referenced
../src/CUDASample.cu(18): warning: variable "var" was set but never used
../src/CUDASample.cu(8): warning: variable "WORK_SIZE" was declared but never referenced
ptxas fatal : Unresolved extern function '_Z2k2i'
make: *** [src/CUDASample.o] Error 255
10:58:14 Build Finished (took 2s.388ms)
How do I call the __device__ function from the sampleCuda() kernel ?

The issue is that you defined a __device__ function in separate compilation unit from __global__ that calls it. You need to either explicitely enable relocatable device code mode by adding -dc flag or move your definition to the same unit.
From nvcc documentation:
--device-c|-dc Compile each .c/.cc/.cpp/.cxx/.cu input file into an object file that contains relocatable device code. It is equivalent to
--relocatable-device-code=true --compile.
See Separate Compilation and Linking of CUDA C++ Device Code for more information.

Related

How do I compile a CPP file with a C file?

I am trying to implement my pthread_create function. After searching online I found few examples but I could not compile them and run the code.
I have these 2 files, first one is pthread.c
#define _GNU_SOURCE
#include <dlfcn.h>
#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <signal.h>
int (*original_pthread_create)(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) = NULL;
void load_original_pthread_create() {
void *handle = dlopen("libpthread-2.15.so", RTLD_LAZY);
char *err = dlerror();
if (err) {
printf("%s\n", err);
}
original_pthread_create = dlsym(handle, "pthread_create");
err = dlerror();
if (err) {
printf("%s\n", err);
}
}
int my_pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) {
if (original_pthread_create == NULL) {
load_original_pthread_create();
}
printf("I am creating thread from my pthread_create\n");
return original_pthread_create(thread, attr, start_routine, arg);
}
I compiled this using the below command and got a shared object named libpthread.so
gcc pthread.c -o libmypthread.so -shared -fpic -ldl
Now the second file, main.cpp
#include <stdlib.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <stdio.h>
#include <ostream>
#include <iostream>
#include <thread>
#include <chrono>
#include <pthread.h>
#include <signal.h>
#include <functional>
#include <mutex>
#include <condition_variable>
#include <execinfo.h>
#include <sstream>
#include <dlfcn.h>
#include <unistd.h>
void *dummyThread(void *t)
{
// just spin/sleep until done
//std::cout<<"This thread ID is "<<std::this_thread::get_id()<<std::endl;
//long thID = (long) id;
printf("thread Id is pthread id is %lu\n", pthread_self());
while(!done)
{
sleep(10);
}
}
int main(int argc, char*argv[])
{
printf("Hello World!\n");
pthread_t ptid1, ptid2;
int ret1 = my_pthread_create(&ptid1, NULL, dummyThread, NULL);
int ret2 = my_pthread_create(&ptid2, NULL, dummyThread, NULL);
pthread_exit(NULL);
printf ("Goodbye Cruel World!\n");
}
To compile this above code, I use a Makefile
CC=gcc
CXX=g++
RM=rm -f
CPPFLAGS=-g
CXXFLAGS=-std=c++17
LDFLAGS=-g -rdynamic
LDLIBS=-lpthread -ldl
SRCS=main.cpp
OBJS=$(subst .cpp,.o,$(SRCS))
all: tool
tool: $(OBJS)
$(CXX) $(LDFLAGS) -o tool $(OBJS) $(LDLIBS)
when I run make, I get this following error:
undefined reference to my_pthread_create(unsigned long*, pthread_attr_t const*, void* (*)(void*), void*)' /usr/bin/ld: /home/hgovindh/perf/main.cpp:: undefined reference to my_pthread_create(unsigned long*, pthread_attr_t const*, void*
()(void), void*)' collect2: error: ld returned 1 exit status make:
*** [Makefile:16: tool] Error 1
Now, how do I run this main.cpp so that it calls the my_pthread_create defined in the pthread.c file?
Are you familiar with C++'s name mangling? In C, if you have a function named foo -- regardless of the arguments it receives -- the symbol in the .o file is called foo. In C++, the name is mangled to include type information. This is how you can engage in method name overloading based on different arguments.
In your include file, you need to use
extern "C" {
....
}
This tells the C++ compiler that the symbols inside those braces are C symbols, and don't name-mangle them.
It will actually look something like this:
#ifdef __cplusplus
extern "C" {
#endif
With similar ifdef around the closing brace.
You should wrap your entire C .h file in this type of guard so it can be included in your C++ files. The other choice is to guard it when you include it, but it's better if you do the guard in the include file itself.
So you'll ultimately have something like this:
#ifdef __cplusplus
extern "C" {
#endif
void load_original_pthread_create();
int my_pthread_create(pthread_t *, const pthread_attr_t *, void *(*) (void *), void *);
#ifdef __cplusplus
}
#endif
You should be able do create different compilation rules depending on the file extension. Note that my preferred approach of using cmake is presented at the end of the answer.
CC=gcc
CXX=g++
RM=rm -f
CPPFLAGS=-g
CXXFLAGS=-std=c++17
CFLAGS=-g
LDFLAGS=-g -rdynamic
LDLIBS=-lpthread -ldl
SRCS=main.cpp pthread.c
OBJS=$(addsuffix .o,$(SRCS))
all: tool
tool: $(OBJS)
$(CXX) $(LDFLAGS) -o tool $(OBJS) $(LDLIBS)
%.cpp.o: %.cpp
$(CXX) -c -o $# $(CPPFLAGS) $(CXXFLAGS) $<
%.c.o: %.c
$(CC) -c -o $# $(CFLAGS) $<
this appends the .o to the file name including extension to be able to tell .c files appart from .c files in the rules for the .o file.
Alternative: Let CMake generate the makefiles
CMake can take care of generating the project files for you. It will automatically choose some reasonable defaults and allows you to change the build system you're using without much effort e.g. to ninja instead of unix makefiles.
CMakeLists.txt
(Place in the directory containing the source files)
cmake_minimum_required(VERSION 3.16)
# compile features for c++17 only documented in v 3.16, but may be available earlier
project(Tool)
add_executable(tool
main.cpp
pthread.c
)
# make sure at least C++17 is used
target_compile_features(tool PRIVATE cxx_std_17)
set_target_properties(tool PROPERTIES
CXX_EXTENSIONS Off # no -std=gnu-...
# CXX_STANDARD 17 # require an exact standard (remove target_compile_feature, if commented in)
)
target_link_libraries(tool PRIVATE pthread ${CMAKE_DL_LIBS})
Building the project on linux should use gcc and g++ as default, unless you modified the environment variables to point to other compilers, in addition to using the generator "Unix Makefiles" by default.
Choose the name of a directory where CMake is free to modify/remove/overwrite any files without messing with your project files. I use build_release and build_debug below. From the directory containing the CMakeLists.txt file, execute the following commands to create makefile projects for the release and debug version respectively. This step is necessary only once.
cmake -D CMAKE_BUILD_TYPE=Debug -S . -B build_debug
cmake -D CMAKE_BUILD_TYPE=Release -S . -B build_release
Now the equivalent of running make all would be
cmake --build build_debug
cmake --build build_release
(alternatively you could change the working directory to build_debug or build_release and run make all, but the cmake --build ... version works with any Generator type even without changing the working directory.)

use of undeclared identifier 'temp'

The below c++ code gives me error use of undeclared identifier 'temp'
/home/test/include/memory.h
#ifndef MEMORY_H
#define MEMORY_H
#if __ENABLE_MEMORY
__device__ int temp = 50;
extern "C" inline __device__ void* memory(){
...
temp = temp + 100;
...
}
#endif
#endif
/home/test/include/internal_memory.h
#ifndef INTERNAL_MEMORY_H
#define INTERNAL_MEMORY_H
#ifndef __ENABLE_MEMORY
#define __ENABLE_MEMORY 1
#endif
#if __ENABLE_MEMORY
extern "C" __device__ void* memory();
static inline __device__ void* call_memory(){ return memory();}
#endif
#include <memory.h>
#endif
/home/test/main.cpp
#include "internal_memory.h"
..
.
.
void show(){
std::cout << "temp is: " << temp << std::endl;
}
.
.
.
I m compiling the code with clang++ version 11.0.0.
clang --version looks as follows:
clang version 11.0.0
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /opt/rocm/llvm/bin
Im compiling the code as below.
/opt/rocm/llvm/bin/clang++ -DDEBUG -D__x86_64__ -I/home/test/include -I/home/test -g -fPIC -std=c++14 -o main.o -c main.cpp
what am i missing here.
Function memory has the __device__ qualifier (I suppose you are compiling a CUDA program).
memory refers to temp, which is a global variable in host memory. You probably want __device__ int temp = 50;
To query temp, you would need to copy its value back to the host. Along the lines of
int host;
cudaMemcpy(&host, &temp, sizeof(int), cudaMemcpyDeviceToHost);

Ebpf:undefined symbol for helpers

I run an up to date debian testing (with kernel 4.19).
Helpers are not found on my system (but they exist in the header, Qt jumps to them)
#include "bpf/bpf.h"
int main (){
int r = bpf_create_map(BPF_MAP_TYPE_ARRAY,1,1,1,0);
return 0;
}
Compilation results in
undefined reference to `bpf_create_map(bpf_map_type, int, int, int, unsigned int)'
compiled with
g++ -c -pipe -g -std=gnu++1z -Wall -W -fPIC -DQT_QML_DEBUG -I. -I../../Qt/5.13.0/gcc_64/mkspecs/linux-g++ -o main.o main.cpp
g++ -lbpf -o server main.o
Same result with
g++ main.cpp -lbpf -o out
I have the libbpf-dev installed as well and i have the associated libraries (a and so).
What is wrong?
Update
even the following code won't work
#include <linux/bpf.h>
int main (){
//int r = bpf_create_map(BPF_MAP_TYPE_ARRAY,1,1,1,0);
bpf_attr attr = {};
attr.map_type = BPF_MAP_TYPE_ARRAY;
attr.key_size = 1;
attr.value_size = 1;
attr.max_entries = 1;
bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
return 0;
}
results in
error: 'bpf' was not declared in this scope
Update2:
BTW, key size is mandated to be 4 and not 1; but it is a point aside, that was unrelated to my problem here.
Namespace issue due to compiling in C++, you probably want:
extern "C" {
#include "bpf/bpf.h"
}
int main()...
Regarding your second error (error: 'bpf' was not declared in this scope), this is not directly related to libbpf, this is because there is no function simply called bpf() to actually perform the syscall. Instead you have to use the syscall number. For example, libbpf defines the following:
static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
unsigned int size)
{
return syscall(__NR_bpf, cmd, attr, size);
}
... and uses sys_bpf() after that, the same way you try to call bpf() in your sample.
For the record, “BPF helpers” often designates BPF functions that you call from within a BPF program, which is not the case here. Hence some confusion in the comments, I believe.

Cmake configutarion file for calling a C++ code from a C++ function called by a FORTRAN code

I have been using a FORTRAN code, main.f90, to send an array to func.cpp which would call a C++ code, addition.cpp & addition.h. The code was working properly on a CentOS 4 platform, but when I moved it to a CentOS 6 one, it is giving me errors. I have tried using the same version of gcc (4.3.0) on both machines or a newer version 4.4.7 in CentOS 6, but the issue is not resolved. I am attaching the over-simplified version of the code as
main.f90:
program main
use iso_c_binding
implicit none
interface
function func (a) bind (C, name="func")
import
integer(c_int):: func
real(c_double), dimension(1:4), intent(in):: a
end function func
end interface
real(c_double), dimension(1:4):: a = [ 2.3, 3.4, 4.5, 5.6 ]
integer(c_int):: result
result = func(a)
write (*,*) result
end program main
func.cpp:
#include <iostream>
#include "addition.h"
using namespace std;
#ifdef __cplusplus
extern"C" {
#endif
void *__gxx_personality_v0;
int func(double a[]) {
int i;
for(i=0; i<4; i++) {
cout << a[i] << endl;
}
int z;
z = addition (5,3);
cout << z << endl;
return 0;
}
#ifdef __cplusplus
}
#endif
addition.cpp:
#include <iostream>
#include "addition.h"
using namespace std;
int addition (int a, int b)
{
int r;
r = a + b;
return r;
}
addition.h:
#ifndef ADDITION_H
#define ADDITION_H
int addition (int a, int b);
#endif /* ADDITION_H */
CMakeLists.txt:
PROJECT(test)
cmake_minimum_required(VERSION 2.6)
enable_language(C Fortran)
# Setting the compilers
set (CMAKE_Fortran_COMPILER /usr/bin/gfortran)
set (CMAKE_CXX_COMPILER /usr/bin/g++)
# Setting the flags
set (CMAKE_CXX_FLAGS "-lgfortran")
set_source_files_properties(main.f90 func.cpp PROPERTIES COMPILE_FLAGS -c)
# Making the executable
ADD_EXECUTABLE(test.exe main.f90 func.cpp addition.cpp addition.h)
The error I am getting now is:
/usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../lib64/crt1.o: In function `_start':
(.text+0x20): undefined reference to `main'
collect2: ld returned 1 exit status
make[2]: *** [test.exe] Error 1
make[1]: *** [CMakeFiles/test.exe.dir/all] Error 2
make: *** [all] Error 2
I appreciate any help on solving the issue.
Why do you link with g++ when your main program is in Fortran? Do it the other way, link with gfortran and add -lstdc++.
Just add line: SET_TARGET_PROPERTIES(test.exe PROPERTIES LINKER_LANGUAGE Fortran)
Or use more recent GCC. All versions of GCC which are still supported work even with your original settings.

undefined reference to function in gcc

I am trying to use a C static library but getting the following error while compiling/linking in gcc. The main file test.c needs to call a function from the static library libtest.a
Header file : testcplusplus.h
void print_cplusplus(int b);
testcplusplus.c :
#include <stdio.h>
#include "testcplusplus.h"
void print_cplusplus(int b) {
printf ("Value of b is %d \n",b);
}
Main C file : test.c
#include <stdio.h>
#include "testcplusplus.h"
int main() {
int a = 2 ;
print_cplusplus(a);
}
Commands Used :
g++ -c -o testcplusplus.o testcplusplus.c
ar rvs libtest.a testcplusplus.o
gcc -o test test.c -L. -ltest **// Error comes here**
Error:
In function `main':
test.c:(.text+0x15): undefined reference to `print_cplusplus'
collect2: ld returned 1 exit status
You only specify function parameter types when you declare/define the function, not when you call it. The function call should look like
print_cplusplus(a);
You also need to include testcplusplus.h from test.c so that the declaration is available when you call it. The return type of main needs to be int; and print_cplusplus should either have void return type, or should return a value.
Finally, you'll need to declare the function extern "C" for it to be callable from a C program - but only when compiling C++.
// testcplusplus.h
#ifdef __cplusplus
extern "C" {
#endif
void print_cplusplus(int b);
#ifdef __cplusplus
}
#endif
You don't rewrite a variables type when you use it, it's only used in declarations:
print_cplusplus(a);