cuda shared library linking: undefined reference to cudaRegisterLinkedBinary - c++

Goal:
create a shared library containing my CUDA kernels that has a CUDA-free wrapper/header.
create a test executable for the shared library.
Problem
shared library MYLIB.so seems to compile fine. (no problem).
Error in linking:
./libMYLIB.so: undefined reference to __cudaRegisterLinkedBinary_39_tmpxft_000018cf_00000000_6_MYLIB_cpp1_ii_74c599a1
simplified makefile:
libMYlib.so : MYLIB.o
g++ -shared -Wl,-soname,libMYLIB.so -o libMYLIB.so MYLIB.o -L/the/cuda/lib/dir -lcudart
MYLIB.o : MYLIB.cu MYLIB.h
nvcc -m64 -arch=sm_20 -dc -Xcompiler '-fPIC' MYLIB.cu -o MYLIB.o -L/the/cuda/lib/dir -lcudart
test : test.cpp libMYlib.so
g++ test.cpp -o test -L. -ldl -Wl,-rpath,. -lMYLIB -L/the/cuda/lib/dir -lcudart
indeed
nm libMYLIB.so shows that all CUDA api functions are "undefined symbols":
U __cudaRegisterFunction
U __cudaRegisterLinkedBinary_39_tmpxft_0000598c_00000000_6_CUPA_cpp1_ii_74c599a1
U cudaEventRecord
U cudaFree
U cudaGetDevice
U cudaGetDeviceProperties
U cudaGetErrorString
U cudaLaunch
U cudaMalloc
U cudaMemcpy
So CUDA somehow did not get linked to the shared library MYLIB.so
What am I missing?
CUDA did not even get linked to the object file somehow:
nm MYLIB.o
U __cudaRegisterFunction
U __cudaRegisterLinkedBinary_39_tmpxft_0000598c_00000000_6_CUPA_cpp1_ii_74c599a1
U cudaEventRecord
U cudaFree
U cudaGetDevice
U cudaGetDeviceProperties
U cudaGetErrorString
U cudaLaunch
U cudaMalloc
U cudaMemcpy
(same as above)

Here's an example linux shared object creation along the lines you indicated:
create a shared library containing my CUDA kernels that has a
CUDA-free wrapper/header.
create a test executable for the shared library.
First the shared library. The build commands for this are as follows:
nvcc -arch=sm_20 -Xcompiler '-fPIC' -dc test1.cu test2.cu
nvcc -arch=sm_20 -Xcompiler '-fPIC' -dlink test1.o test2.o -o link.o
g++ -shared -o test.so test1.o test2.o link.o -L/usr/local/cuda/lib64 -lcudart
It seems you may be missing the second step above in your makefile, but I haven't analyzed if there are any other issues with your makefile.
Now, for the test executable, the build commands are as follows:
g++ -c main.cpp
g++ -o testmain main.o test.so
To run it, simply execute the testmain executable, but be sure the test.so library is on your LD_LIBRARY_PATH.
These are the files I used for test purposes:
test1.h:
int my_test_func1();
test1.cu:
#include <stdio.h>
#include "test1.h"
#define DSIZE 1024
#define DVAL 10
#define nTPB 256
#define cudaCheckErrors(msg) \
do { \
cudaError_t __err = cudaGetLastError(); \
if (__err != cudaSuccess) { \
fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
msg, cudaGetErrorString(__err), \
__FILE__, __LINE__); \
fprintf(stderr, "*** FAILED - ABORTING\n"); \
exit(1); \
} \
} while (0)
__global__ void my_kernel1(int *data){
int idx = threadIdx.x + (blockDim.x *blockIdx.x);
if (idx < DSIZE) data[idx] =+ DVAL;
}
int my_test_func1(){
int *d_data, *h_data;
h_data = (int *) malloc(DSIZE * sizeof(int));
if (h_data == 0) {printf("malloc fail\n"); exit(1);}
cudaMalloc((void **)&d_data, DSIZE * sizeof(int));
cudaCheckErrors("cudaMalloc fail");
for (int i = 0; i < DSIZE; i++) h_data[i] = 0;
cudaMemcpy(d_data, h_data, DSIZE * sizeof(int), cudaMemcpyHostToDevice);
cudaCheckErrors("cudaMemcpy fail");
my_kernel1<<<((DSIZE+nTPB-1)/nTPB), nTPB>>>(d_data);
cudaDeviceSynchronize();
cudaCheckErrors("kernel");
cudaMemcpy(h_data, d_data, DSIZE * sizeof(int), cudaMemcpyDeviceToHost);
cudaCheckErrors("cudaMemcpy 2");
for (int i = 0; i < DSIZE; i++)
if (h_data[i] != DVAL) {printf("Results check failed at offset %d, data was: %d, should be %d\n", i, h_data[i], DVAL); exit(1);}
printf("Results check 1 passed!\n");
return 0;
}
test2.h:
int my_test_func2();
test2.cu:
#include <stdio.h>
#include "test2.h"
#define DSIZE 1024
#define DVAL 20
#define nTPB 256
#define cudaCheckErrors(msg) \
do { \
cudaError_t __err = cudaGetLastError(); \
if (__err != cudaSuccess) { \
fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
msg, cudaGetErrorString(__err), \
__FILE__, __LINE__); \
fprintf(stderr, "*** FAILED - ABORTING\n"); \
exit(1); \
} \
} while (0)
__global__ void my_kernel2(int *data){
int idx = threadIdx.x + (blockDim.x *blockIdx.x);
if (idx < DSIZE) data[idx] =+ DVAL;
}
int my_test_func2(){
int *d_data, *h_data;
h_data = (int *) malloc(DSIZE * sizeof(int));
if (h_data == 0) {printf("malloc fail\n"); exit(1);}
cudaMalloc((void **)&d_data, DSIZE * sizeof(int));
cudaCheckErrors("cudaMalloc fail");
for (int i = 0; i < DSIZE; i++) h_data[i] = 0;
cudaMemcpy(d_data, h_data, DSIZE * sizeof(int), cudaMemcpyHostToDevice);
cudaCheckErrors("cudaMemcpy fail");
my_kernel2<<<((DSIZE+nTPB-1)/nTPB), nTPB>>>(d_data);
cudaDeviceSynchronize();
cudaCheckErrors("kernel");
cudaMemcpy(h_data, d_data, DSIZE * sizeof(int), cudaMemcpyDeviceToHost);
cudaCheckErrors("cudaMemcpy 2");
for (int i = 0; i < DSIZE; i++)
if (h_data[i] != DVAL) {printf("Results check failed at offset %d, data was: %d, should be %d\n", i, h_data[i], DVAL); exit(1);}
printf("Results check 2 passed!\n");
return 0;
}
main.cpp:
#include <stdio.h>
#include "test1.h"
#include "test2.h"
int main(){
my_test_func1();
my_test_func2();
return 0;
}
When I compile according to the commands given, and run ./testmain I get:
$ ./testmain
Results check 1 passed!
Results check 2 passed!
Note that if you prefer, you may generate a libtest.so instead of test.so, and then you may use a modified build sequence for the test executable:
g++ -c main.cpp
g++ -o testmain main.o -L. -ltest
I don't believe it makes any difference, but it may be more familiar syntax.
I'm sure there is more than one way to accomplish this. This is just an example.
You may wish to also review the relevant section of the nvcc manual and also review the examples.
EDIT: I tested this under cuda 5.5 RC, and the final application link step complained about not finding the cudart lib (warning: libcudart.so.5.5., needed by ./libtest.so, not found). However the following relatively simple modification (example Makefile) should work under either cuda 5.0 or cuda 5.5.
Makefile:
testmain : main.cpp libtest.so
g++ -c main.cpp
g++ -o testmain -L. -ldl -Wl,-rpath,. -ltest -L/usr/local/cuda/lib64 -lcudart main.o
libtest.so : link.o
g++ -shared -Wl,-soname,libtest.so -o libtest.so test1.o test2.o link.o -L/usr/local/cuda/lib64 -lcudart
link.o : test1.cu test2.cu test1.h test2.h
nvcc -m64 -arch=sm_20 -dc -Xcompiler '-fPIC' test1.cu test2.cu
nvcc -m64 -arch=sm_20 -Xcompiler '-fPIC' -dlink test1.o test2.o -o link.o
clean :
rm -f testmain test1.o test2.o link.o libtest.so main.o

The other answers did not work for me (maybe because I’m using cuda 10).
The solution that worked for me was compiling the cuda files as:
nvcc -dc -o cuda_file.o cuda_file.cu
Than compiling the c++ file as:
g++ -c -o cpp_file.o cpp_file.cpp
And finally linking all using nvcc:
nvcc -o my_prog cpp_file.o cuda_file.o -lcudart -lcuda -L<other stuff>
Don’t take this code literally. But the core of the solution to the error was using nvcc instead of g++ in the final linking step.

Related

Msys2 mingw built libcurl with openssl, but https unsupported

I need to use libcurl with https in my project, so I've built zlib, openssl and libcurl (with mingw32-make mingw32-ssl-zlib) successfully. But after I copied include libcurl.a libcurldll.a libcurl.dll to my project folder, sent a GET request to https://www.google.com it failed with error Unsupported protocol
My OPENSSL_PATH and ZLIB_PATH in my lib/Makefile.m32 src/Makefile.m32 are:
ZLIB_PATH = ../../zlib-1.2.8
OPENSSL_PATH = /c/OpenSSL
and the build log:
Compeador#DESKTOP-QCSNUGN MSYS /e/programming/c/lib/curl-7.74.0
$ mingw32-make mingw32-ssl-zlib
C:/msys64/mingw32/bin/mingw32-make.exe -C lib -f Makefile.m32 CFG=mingw32-ssl-zlib
mingw32-make[1]: Entering directory 'E:/programming/c/lib/curl-7.74.0/lib'
gcc -I. -I../include -I"C:/OpenSSL/include" -I"../../zlib-1.2.8" -g -O2 -Wall -W -fno-strict-aliasing -m32 -DBUILDING_LIBCURL -DUSE_OPENSSL -DHAVE_OPENSSL_PKCS12_H -DOPENSSL_NO_KRB5 -DHAVE_LIBZ -DHAVE_ZLIB_H -c altsvc.c -o altsvc.o
gcc ......
ar cru libcurl.a altsvc.o amigaos.o asyn-ares.o asyn-thread.o base64.o conncache.o connect.o content_encoding.o cookie.o curl_addrinfo.o curl_ctype.o curl_des.o curl_endian.o curl_fnmatch.o curl_get_line.o curl_gethostname.o curl_gssapi.o curl_memrchr.o curl_multibyte.o curl_ntlm_core.o curl_ntlm_wb.o curl_path.o curl_range.o curl_rtmp.o curl_sasl.o curl_sspi.o curl_threads.o dict.o dotdot.o easy.o escape.o file.o fileinfo.o formdata.o ftp.o url.o ftplistparser.o getenv.o getinfo.o gopher.o hash.o hmac.o hostasyn.o hostcheck.o hostip.o hostip4.o hostip6.o hostsyn.o http.o http2.o http_chunks.o http_digest.o http_negotiate.o http_ntlm.o http_proxy.o idn_win32.o if2ip.o imap.o inet_ntop.o inet_pton.o krb5.o ldap.o llist.o md4.o md5.o memdebug.o mime.o mprintf.o mqtt.o multi.o netrc.o non-ascii.o nonblock.o openldap.o parsedate.o pingpong.o pop3.o progress.o psl.o doh.o rand.o rename.o rtsp.o select.o sendf.o setopt.o sha256.o share.o slist.o smb.o smtp.o socketpair.o socks.o socks_gssapi.o socks_sspi.o speedcheck.o splay.o strcase.o strdup.o strerror.o strtok.o strtoofft.o system_win32.o telnet.o tftp.o timeval.o transfer.o urlapi.o version.o warnless.o wildcard.o x509asn1.o dynbuf.o version_win32.o easyoptions.o easygetopt.o hsts.o vauth/cleartext.o vauth/cram.o vauth/digest.o vauth/digest_sspi.o vauth/krb5_gssapi.o vauth/krb5_sspi.o vauth/ntlm.o vauth/ntlm_sspi.o vauth/oauth2.o vauth/spnego_gssapi.o vauth/spnego_sspi.o vauth/vauth.o vtls/bearssl.o vtls/gskit.o vtls/gtls.o vtls/keylog.o vtls/mbedtls.o vtls/mbedtls_threadlock.o vtls/mesalink.o vtls/nss.o vtls/openssl.o vtls/schannel.o vtls/schannel_verify.o vtls/sectransp.o vtls/vtls.o vtls/wolfssl.o vquic/ngtcp2.o vquic/quiche.o vquic/vquic.o vssh/libssh.o vssh/libssh2.o vssh/wolfssh.o
ranlib libcurl.a
strip -g libcurl.a
windres --include-dir=../include -DDEBUGBUILD=0 -O coff -F pe-i386 -i libcurl.rc -o libcurl.res
gcc -s -m32 -shared -o libcurl.dll \
-Wl,--output-def,libcurl.def,--out-implib,libcurldll.a \
altsvc.o amigaos.o asyn-ares.o asyn-thread.o base64.o conncache.o connect.o content_encoding.o cookie.o curl_addrinfo.o curl_ctype.o curl_des.o curl_endian.o curl_fnmatch.o curl_get_line.o curl_gethostname.o curl_gssapi.o curl_memrchr.o curl_multibyte.o curl_ntlm_core.o curl_ntlm_wb.o curl_path.o curl_range.o curl_rtmp.o curl_sasl.o curl_sspi.o curl_threads.o dict.o dotdot.o easy.o escape.o file.o fileinfo.o formdata.o ftp.o url.o ftplistparser.o getenv.o getinfo.o gopher.o hash.o hmac.o hostasyn.o hostcheck.o hostip.o hostip4.o hostip6.o hostsyn.o http.o http2.o http_chunks.o http_digest.o http_negotiate.o http_ntlm.o http_proxy.o idn_win32.o if2ip.o imap.o inet_ntop.o inet_pton.o krb5.o ldap.o llist.o md4.o md5.o memdebug.o mime.o mprintf.o mqtt.o multi.o netrc.o non-ascii.o nonblock.o openldap.o parsedate.o pingpong.o pop3.o progress.o psl.o doh.o rand.o rename.o rtsp.o select.o sendf.o setopt.o sha256.o share.o slist.o smb.o smtp.o socketpair.o socks.o socks_gssapi.o socks_sspi.o speedcheck.o splay.o strcase.o strdup.o strerror.o strtok.o strtoofft.o system_win32.o telnet.o tftp.o timeval.o transfer.o urlapi.o version.o warnless.o wildcard.o x509asn1.o dynbuf.o version_win32.o easyoptions.o easygetopt.o hsts.o vauth/cleartext.o vauth/cram.o vauth/digest.o vauth/digest_sspi.o vauth/krb5_gssapi.o vauth/krb5_sspi.o vauth/ntlm.o vauth/ntlm_sspi.o vauth/oauth2.o vauth/spnego_gssapi.o vauth/spnego_sspi.o vauth/vauth.o vtls/bearssl.o vtls/gskit.o vtls/gtls.o vtls/keylog.o vtls/mbedtls.o vtls/mbedtls_threadlock.o vtls/mesalink.o vtls/nss.o vtls/openssl.o vtls/schannel.o vtls/schannel_verify.o vtls/sectransp.o vtls/vtls.o vtls/wolfssl.o vquic/ngtcp2.o vquic/quiche.o vquic/vquic.o vssh/libssh.o vssh/libssh2.o vssh/wolfssh.o libcurl.res -L"C:/OpenSSL/lib" -lssl -lcrypto -lgdi32 -lcrypt32 -L"../../zlib-1.2.8" -lz -lwldap32 -lws2_32
mingw32-make[1]: Leaving directory 'E:/programming/c/lib/curl-7.74.0/lib'
C:/msys64/mingw32/bin/mingw32-make.exe -C src -f Makefile.m32 CFG=mingw32-ssl-zlib
mingw32-make[1]: Entering directory 'E:/programming/c/lib/curl-7.74.0/src'
windres --include-dir=../include -O coff -DCURL_EMBED_MANIFEST -F pe-i386 -i curl.rc -o curl.res
gcc -I. -I../include -I../lib -I"C:/OpenSSL/include" -I"../../zlib-1.2.8" -g -O2 -Wall -W -fno-strict-aliasing -m32 -DCURL_STATICLIB -DUSE_OPENSSL -DHAVE_LIBZ -DHAVE_ZLIB_H -c slist_wc.c
gcc ......
rm -f curl.exe
gcc -s -m32 -static -o curl.exe curl.res slist_wc.o tool_binmode.o tool_bname.o tool_cb_dbg.o tool_cb_hdr.o tool_cb_prg.o tool_cb_rea.o tool_cb_see.o tool_cb_wrt.o tool_cfgable.o tool_convert.o tool_dirhie.o tool_doswin.o tool_easysrc.o tool_filetime.o tool_formparse.o tool_getparam.o tool_getpass.o tool_help.o tool_helpers.o tool_homedir.o tool_hugehelp.o tool_libinfo.o tool_main.o tool_metalink.o tool_msgs.o tool_operate.o tool_operhlp.o tool_panykey.o tool_paramhlp.o tool_parsecfg.o tool_progress.o tool_strdup.o tool_setopt.o tool_sleep.o tool_urlglob.o tool_util.o tool_vms.o tool_writeout.o tool_writeout_json.o tool_xattr.o strtoofft.o nonblock.o warnless.o curl_ctype.o curl_multibyte.o version_win32.o dynbuf.o -L../lib -lcurl -L"C:/OpenSSL/lib" -lssl -lcrypto -lgdi32 -lcrypt32 -L"../../zlib-1.2.8" -lz -lwldap32 -lws2_32
mingw32-make[1]: Leaving directory 'E:/programming/c/lib/curl-7.74.0/src'
and my code:
#include <iostream>
#include <windows.h>
#include <openssl/ssl.h>
#include <curl/curl.h>
using namespace std;
size_t CurlStrWCallback(char* content, size_t size, size_t nmemb, void *userdata){
auto total = size*nmemb;
((std::string*)userdata)->append((char*)content, total);
return total;
}
int main(){
auto curl = curl_easy_init();
if(curl_global_init(CURL_GLOBAL_ALL) != CURLE_OK){
std::cout << "Fail to init curl\n";
return 0;
}
if(curl){
std::string buffer;
curl_easy_setopt(curl, CURLOPT_URL, "https://www.google.com");
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, CurlStrWCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);
auto res = curl_easy_perform(curl);
if(res != CURLE_OK){
std::cout << "failed to perform request: " << curl_easy_strerror(res) << '\n';
}
else{
std::cout << "Response:\n" << buffer << std::endl;
}
curl_easy_cleanup(curl);
}
curl_global_cleanup();
return 0;
}
output: failed to perform request: Unsupported protocol
meanwhlie I also tried to run libcurl/src/curl.exe https://www.google.com -k and it sucessed
Turns out I have to compile all source in the same fashion I use in Linux: ./configure && make && make install
here are the commands I use:
zlib:
./configure
make && make install
openssl:
./configure --prefix=$PWD/dist no-idea no-mdc2 no-rc5 shared mingw (or mingw64 for 64 bits)
make && make install
libcurl:
./configure --prefix=$PWD/dist --with-zlib=PATH_TO_COMPILED_ZLIB --with-ssl=PATH_TO_COMPILED_OPENSSL --host=i686-w64-mingw32 (or x86_64-w64-mingw32 for 64 bits)
make && make install

Undefined symbol when trying to link with shared library built from CUDA objects

I'm experimenting with building a simple application from a couple of .cu source files and a very simple C++ main that calls a function from one of the .cu files. I'm making a shared library (.so file) from the compiled .cu files. I'm finding that everything builds without trouble, but when I try to run the application, I get a linker undefined symbol error, with the mangled name of the .cu function I'm calling from main(). If I build a static library instead, my application runs just fine. Here's the makefile I've set up:
.PHONY: clean
NVCCFLAGS = -std=c++11 --compiler-options '-fPIC'
CXXFLAGS = -std=c++11
HLIB = libhello.a
SHLIB = libhello.so
CUDA_OBJECTS = bridge.o add.o
all: driver
%.o :: %.cu
nvcc -o $# $(NVCCFLAGS) -c -I. $<
%.o :: %.cpp
c++ $(CXXFLAGS) -o $# -c -I. $<
$(HLIB): $(CUDA_OBJECTS)
ar rcs $# $^
$(SHLIB): $(CUDA_OBJECTS)
nvcc $(NVCCFLAGS) --shared -o $# $^
#driver : driver.o $(HLIB)
# c++ -std=c++11 -fPIC -o $# driver.o -L. -lhello -L/usr/local/cuda-10.1/targets/x86_64-linux/lib -lcudart
driver : driver.o $(SHLIB)
c++ -std=c++11 -fPIC -o $# driver.o -L. -lhello
clean:
-rm -f driver *.o *.so *.a
Here are the various source files that the makefile takes as fodder.
add.cu:
__global__ void add(int n, int* a, int* b, int* c) {
int index = threadIdx.x;
int stride = blockDim.x;
for (int ii = index; ii < n; ii += stride) {
c[ii] = a[ii] + b[ii];
}
}
add.h:
extern __global__ void add(int n, int* a, int* b, int* c);
bridge.cu:
#include <iostream>
#include "add.h"
void bridge() {
int N = 1 << 16;
int blockSize = 256;
int numBlocks = (N + blockSize - 1)/blockSize;
int* a;
int* b;
int* c;
cudaMallocManaged(&a, N*sizeof(int));
cudaMallocManaged(&b, N*sizeof(int));
cudaMallocManaged(&c, N*sizeof(int));
for (int ii = 0; ii < N; ii++) {
a[ii] = ii;
b[ii] = 2*ii;
}
add<<<numBlocks, blockSize>>>(N, a, b, c);
cudaDeviceSynchronize();
for (int ii = 0; ii < N; ii++) {
std::cout << a[ii] << " + " << b[ii] << " = " << c[ii] << std::endl;
}
cudaFree(a);
cudaFree(b);
cudaFree(c);
}
bridge.h:
extern void bridge();
driver.cpp:
#include "bridge.h"
int main() {
bridge();
return 0;
}
I'm very new to cuda, so I expect that's where I'm doing something wrong. I've played a bit with using extern "C" declarations, but that just seems to move the "undefined symbol" error from run time to build time.
I'm familiar with various ways that one can end up with an undefined symbol, and I've mentioned various experiments I've already performed (static linking, extern "C" declarations) that make me think that this problem isn't addressed by the proposed duplicate question.
My unresolved symbol is _Z6bridgev
It looks to me as though the linker should be able resolve the symbol. If I can nm on driver.o, I see:
0000000000000000 T main
U _Z6bridgev
And if I run nm on libhello.so, I see:
0000000000006e56 T _Z6bridgev
When Robert Crovella was able to get my example to work on his machine, while I wasn't able to get his example to work on mine, I started realizing that my problem had nothing to do with cuda or nvcc. It was the fact that with a shared library, the loader has to resolve symbols at runtime, and my shared library wasn't in a "well-known location". I built a simple test case just now, purely with c++ sources, and repeated my failure. Once I copied libhello.so to /usr/local/lib, I was able to run driver successfully. So, I'm OK with closing my original question, if that's the will of the people.

cublasSasum causing segmentation fault in custom TensorFlow op [duplicate]

The code of cublas below give us the errors:core dumped while being at "cublasSnrm2(handle,row,dy,incy,de)",could you give some advice?
main.cu
#include <iostream>
#include "cublas.h"
#include "cublas_v2.h"
#include "helper_cuda.h"
using namespace std;
int main(int argc,char *args[])
{
float y[10] = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0};
int dev=0;
checkCudaErrors(cudaSetDevice(dev));
//cublas init
cublasStatus stat;
cublasInit();
cublasHandle_t handle;
stat = cublasCreate(&handle);
if (stat !=CUBLAS_STATUS_SUCCESS)
{
printf("cublas handle create failed!\n");
cublasShutdown();
}
float * dy,*de,*e;
int incy = 1,ONE = 1,row = 10;
e = (float *)malloc(sizeof(float)*ONE);
e[0]=0.0f;
checkCudaErrors(cudaMalloc(&dy,sizeof(float)*row));
checkCudaErrors(cudaMalloc(&de,sizeof(float)*ONE));
checkCudaErrors(cudaMemcpy(dy,y,row*sizeof(float),cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(de,e,ONE*sizeof(float),cudaMemcpyHostToDevice));
stat = cublasSnrm2(handle,row,dy,incy,de);
if (stat !=CUBLAS_STATUS_SUCCESS)
{
printf("norm2 compute failed!\n");
cublasShutdown();
}
checkCudaErrors(cudaMemcpy(e,de,ONE*sizeof(float),cudaMemcpyDeviceToHost));
std::cout<<e[0]<<endl;
return 0;
}
makefile is below:
NVIDIA = $(HOME)/NVIDIA_CUDA-5.0_Samples
CUDA = /usr/local/cuda-5.0
NVIDINCADD = -I$(NVIDIA)/common/inc
CUDAINCADD = -I$(CUDA)/include
CC = -L/usr/lib64/ -lstdc++
GCCOPT = -O2 -fno-rtti -fno-exceptions
INTELOPT = -O3 -fno-rtti -xW -restrict -fno-alias
DEB = -g
NVCC = -G
ARCH = -arch=sm_35
bcg:main.cu
nvcc $(DEB) $(NVCC) $(ARCH) $(CC) -lm $(NVIDINCADD) $(CUDAINCADD) -lcublas -I./ -o $(#) $(<)
clean:
rm -f bcg
rm -f hyb
My OS is linux redhat 6.2,CUDA's version is 5.0, GPU is K20M.
The problem is here:
cublasSnrm2(handle,row,dy,incy,de);
By default, the last parameter is a host pointer. So either pass e to the snrm2 call rather than de or do this:
cublasSetPointerMode(handle,CUBLAS_POINTER_MODE_DEVICE);
stat = cublasSnrm2(handle,row,dy,incy,de);
The pointer mode needs to be set to device if you want to pass a device pointer to store the result.

The executable test file is not running the tests

I have created a project:
// func.h :
#ifndef FUNCS_H_
#define FUNCS_H_
int addInts(int i, int j);
#endif /* FUNCS_H_ */
// func.cpp
#include "func.h"
int addInts(int i, int j) {
return i + j;
}
// main.cpp
#include "func.h"
#include <iostream>
int main() {
std::cout << addInts(5, 7) << std::endl;
return 0;
}
//makefile
OBJS := \
main.o \
func.o
CXX := g++
CXX_FLAGS := -Wall -Werror -Wextra -std=c++11
all: main_prj
main_prj: $(OBJS)
$(CXX) $(OBJS) -lm -o main
-include $(OBJS:.o=.d)
%.o: %.cpp
$(CXX) -c $(CXX_FLAGS) $*.cpp -o $*.o
clean:
rm -f main $(OBJS)
And I created also a test (boost test) for that function:
// test/main_test.cpp
#define BOOST_TEST_MODULE main_test
#include <boost/test/included/unit_test.hpp>
//____________________________________________________________________________//
// FIXTURES ...
//____________________________________________________________________________//
// test/addInts/addInts_test.cpp
#include <boost/test/unit_test.hpp>
#include "../../func.h"
BOOST_AUTO_TEST_CASE(addInts_5_7) {
int addInts_5_7 = 5 + 7;
BOOST_CHECK_EQUAL(addInts_5_7, addInts(5, 7));
}
// test/makefile
OBJS_TEST := \
main_test.o \
addInts/addInts_test.o \
../func.o
CXX_TEST := g++
CXX_FLAGS_TEST := -Wall -Werror -Wextra -std=c++11
all_test: main_test_prj
main_test_prj: $(OBJS_TEST)
$(CXX_TEST) $(OBJS_TEST) -lm -o main_test
-include $(OBJS_TEST:.o=.d)
%.o: %.cpp
$(CXX_TEST) -c $(CXX_FLAGS_TEST) $*.cpp -o $*.o
clean_test:
rm -f main_test $(OBJS_TEST)
Now, the make commands work, so they clean all the created files, or create the o files and the executables. When I run the main file, the output is correct: 12 ; but when I run the main_test file, the output is a little strange:
Running 1 test case...
*** No errors detected
I would expect the output with running tests and OK/KO, or pass/not pass... I cannot figure what am I doing wrong. Can anyone help me, please?
You have one test.
The output says that one test was run, and that no errors were found.
This output appears to be as documented.
There is no problem here.
Though, sadly, the documentation on how to change this output is rather lacking…

Make file Linking issue Undefined symbols for architecture x86_64

I am working on getting a few files to link together using my make file and c++ and am getting the following error when running make.
g++ -bind_at_load `pkg-config --cflags opencv` -c -o compute_gist.o compute_gist.cpp
g++ -bind_at_load `pkg-config --cflags opencv` -c -o gist.o gist.cpp
g++ -bind_at_load `pkg-config --cflags opencv` -c -o standalone_image.o standalone_image.cpp
g++ -bind_at_load `pkg-config --cflags opencv` -c -o IplImageConverter.o IplImageConverter.cpp
g++ -bind_at_load `pkg-config --cflags opencv` -c -o GistCalculator.o GistCalculator.cpp
g++ -bind_at_load `pkg-config --cflags opencv` `pkg-config --libs opencv` compute_gist.o gist.o standalone_image.o IplImageConverter.o GistCalculator.o -o rungist
Undefined symbols for architecture x86_64:
"color_gist_scaletab(color_image_t*, int, int, int const*)", referenced from:
_main in compute_gist.o
ld: symbol(s) not found for architecture x86_64
collect2: ld returned 1 exit status
make: *** [rungist] Error 1
My makefile is as follows (Note, I don't need opencv bindings yet, but will be coding in opencv later.
CXX = g++
CXXFLAGS = -bind_at_load `pkg-config --cflags opencv`
LFLAGS = `pkg-config --libs opencv`
SRC = \
compute_gist.cpp \
gist.cpp \
standalone_image.cpp \
IplImageConverter.cpp \
GistCalculator.cpp
OBJS = $(SRC:.cpp=.o)
rungist: $(OBJS)
$(CXX) $(CXXFLAGS) $(LFLAGS) $(OBJS) -o $#
all: rungist
clean:
rm -rf $(OBJS) rungist
The method header is located in gist.h
float *color_gist_scaletab(color_image_t *src, int nblocks, int n_scale, const int *n_orientations);
And the method is defined in gist.cpp
float *color_gist_scaletab(color_image_t *src, int w, int n_scale, const int *n_orientation) {
And finally the compute_gist.cpp (main file)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "gist.h"
static color_image_t *load_ppm(const char *fname) {
FILE *f=fopen(fname,"r");
if(!f) {
perror("could not open infile");
exit(1);
}
int width,height,maxval;
if(fscanf(f,"P6 %d %d %d",&width,&height,&maxval)!=3 || maxval!=255) {
fprintf(stderr,"Error: input not a raw PPM with maxval 255\n");
exit(1);
}
fgetc(f); /* eat the newline */
color_image_t *im=color_image_new(width,height);
int i;
for(i=0;i<width*height;i++) {
im->c1[i]=fgetc(f);
im->c2[i]=fgetc(f);
im->c3[i]=fgetc(f);
}
fclose(f);
return im;
}
static void usage(void) {
fprintf(stderr,"compute_gist options... [infilename]\n"
"infile is a PPM raw file\n"
"options:\n"
"[-nblocks nb] use a grid of nb*nb cells (default 4)\n"
"[-orientationsPerScale o_1,..,o_n] use n scales and compute o_i orientations for scale i\n"
);
exit(1);
}
int main(int argc,char **args) {
const char *infilename="/dev/stdin";
int nblocks=4;
int n_scale=3;
int orientations_per_scale[50]={8,8,4};
while(*++args) {
const char *a=*args;
if(!strcmp(a,"-h")) usage();
else if(!strcmp(a,"-nblocks")) {
if(!sscanf(*++args,"%d",&nblocks)) {
fprintf(stderr,"could not parse %s argument",a);
usage();
}
} else if(!strcmp(a,"-orientationsPerScale")) {
char *c;
n_scale=0;
for(c=strtok(*++args,",");c;c=strtok(NULL,",")) {
if(!sscanf(c,"%d",&orientations_per_scale[n_scale++])) {
fprintf(stderr,"could not parse %s argument",a);
usage();
}
}
} else {
infilename=a;
}
}
color_image_t *im=load_ppm(infilename);
//Here's the method call -> :(
float *desc=color_gist_scaletab(im,nblocks,n_scale,orientations_per_scale);
int i;
int descsize=0;
//compute descriptor size
for(i=0;i<n_scale;i++)
descsize+=nblocks*nblocks*orientations_per_scale[i];
descsize*=3; // color
//print descriptor
for(i=0;i<descsize;i++)
printf("%.4f ",desc[i]);
printf("\n");
free(desc);
color_image_delete(im);
return 0;
}
Any help would be greatly appreciated. I hope this is enough info. Let me know if I need to add more.
I suspect that color_gist_scaletab should be declared as extern "C" in your header file:
extern "C" {
float *color_gist_scaletab(color_image_t *src, int nblocks, int n_scale, const int *n_orientations);
}
Your link command line is incorrect. See this answer.
However, that is likely not the problem you are seeing here.
What do the following commands print?
file gist.o
nm gist.o | grep color_gist_scaletab
I also see this very similar question. But there, color_gist_scaletab comes from gist.c, not gist.cpp. You didn't just rename gist.c to gist.cpp, did you? Don't do that.