I would like to use cuda within a ros package. Has anyone a simple example for me?
I tried to built a static library with the cuda function and add this library to my package, but I get always a linking error: Undefined reference cuda...
I have built a executable instead of the library and it works.
Please help!
I found a solution myself:
CMakeLists.txt:
cmake_minimum_required(VERSION 2.8.3)
PROJECT (beginner_tutorials)
FIND_PACKAGE(CUDA REQUIRED)
find_package(catkin REQUIRED COMPONENTS
roscpp
rospy
std_msgs
)
SET(CUDA_NVCC_FLAGS "-arch=sm_13" CACHE STRING "nvcc flags" FORCE)
SET (CUDA_VERBOSE_BUILD ON CACHE BOOL "nvcc verbose" FORCE)
SET(LIB_TYPE STATIC)
CUDA_ADD_LIBRARY(TestLib ${LIB_TYPE} src/helloWorld.cu)
catkin_package(
)
include_directories(
${catkin_INCLUDE_DIRS}
)
ADD_EXECUTABLE(beginner_tutorials_node src/main.cpp)
ADD_DEPENDENCIES(beginner_tutorials_node TestLib)
TARGET_LINK_LIBRARIES(beginner_tutorials_node
${catkin_LIBRARIES}
${PCL_LIBRARIES}
TestLib
)
main.cpp:
int testmain();
int main()
{
testmain();
return 0;
}
helloWorld.cu:
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
const int N = 7;
const int blocksize = 7;
__global__
void hello(char *a, int *b)
{
a[threadIdx.x] += b[threadIdx.x];
}
int testmain()
{
char a[N] = "Hello ";
int b[N] = {15, 10, 6, 0, -11, 1, 0};
char *ad;
int *bd;
const int csize = N*sizeof(char);
const int isize = N*sizeof(int);
printf("%s", a);
cudaMalloc( (void**)&ad, csize );
cudaMalloc( (void**)&bd, isize );
cudaMemcpy( ad, a, csize, cudaMemcpyHostToDevice );
cudaMemcpy( bd, b, isize, cudaMemcpyHostToDevice );
dim3 dimBlock( blocksize, 1 );
dim3 dimGrid( 1, 1 );
hello<<<dimGrid, dimBlock>>>(ad, bd);
cudaMemcpy( a, ad, csize, cudaMemcpyDeviceToHost );
cudaFree( ad );
printf("%s\n", a);
return EXIT_SUCCESS;
}
If you are using catkin-simple to make your CMake file, you can use this CMakeLists.txt:
cmake_minimum_required(VERSION 2.8.3)
project(cuda_test)
set(CMAKE_CUDA_COMPILER /usr/local/cuda-9.1/bin/nvcc)
find_package(catkin_simple REQUIRED)
find_package(CUDA REQUIRED)
catkin_simple()
#Here you can set any ncvv compiler flags, if you so wish
#SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -DMY_DEF=1")
#Here you can set any gcc/cmake compiler flags, if you so wish
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O3")
#Add all of your sources here
cuda_add_executable(
cuda_test_cu
src/hello.cu
)
#Link the executable to the necessary libs
target_link_libraries(
cuda_test_cu
${catkin_LIBRARIES}
${CUDA_LIBRARIES}
)
# CMake Indexing
FILE(GLOB_RECURSE LibFiles "include/*")
add_custom_target(headers SOURCES ${LibFiles})
cs_install()
I use this and find that it works just fine.
Related
I want to compile a program using CMake in Project/build/ directory having a following organization:
---Project/
---CMakeLists.txt
---main/
---main.f90
---library/
---CMakeLists.txt
---call_ts_module.f90
---call_ts.cpp
---call_ts.h
---torchscript.pt
---libtorch/
---<libtorch libraries>
---build/
The main Fortran program Project/main/main.f90 calls call_ts_subroutine defined in module Project/library/call_ts_module.f90, which calls a c++ function invariant_nn defined in Project/library/call_ts.cpp and Project/library/call_ts.h, which invoke a torchscript Project/library/torchscript.pt.
How should Project/CMakeLists.txt and Project/library/CMakeLists.txt look like? I tried following ones (I am totally new to CMake), but they did not work (make does not see the header file call_ts_export.h):
Project/CMakeLists.txt:
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
project(fortran_calls_ts CXX Fortran)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
endif()
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
set(CMAKE_Fortran_FLAGS_RELEASE "-O3")
find_package(Torch REQUIRED)
add_subdirectory(library)
add_executable(fortran_calls_ts.x main/main.f90)
target_link_libraries(fortran_calls_ts.x library)
set_property(TARGET fortran_calls_ts.x PROPERTY CXX_STANDARD 14)
Project/library/CMakeLists.txt:
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
add_library(library SHARED call_ts_module.f90)
add_library(call_ts_cpp SHARED call_ts.cpp)
target_link_libraries(library call_ts_cpp)
target_link_libraries(call_ts_cpp "${TORCH_LIBRARIES}")
include(GenerateExportHeader)
generate_export_header(call_ts_cpp BASE_NAME call_ts)
install(TARGETS call_ts_cpp LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
install(FILES call_ts.h ${PROJECT_BINARY_DIR}/call_ts_export.h DESTINATION include)
Project/main/main.f90
program main
use call_ts_module
...
call call_ts_subroutine()
...
end program main
Project/library/call_ts_module.f90
module call_ts_module
contains
subroutine call_ts_subroutine()
use, intrinsic :: iso_c_binding, only: c_float
implicit none
interface invariant_nn
subroutine invariant_nn(I, G) bind (c)
import :: c_float
real(c_float) :: I(5)
real(c_float) :: G(10)
end subroutine
end interface
real(4) I(5), G(10)
...
call invariant_nn(I, G)
...
end subroutine call_ts_subroutine
end module call_ts_module
Project/library/call_ts.cpp
#include "call_ts.h"
#include <torch/script.h>
#include <iostream>
#include <memory>
void invariant_nn(float I[], float G[])
{
torch::jit::script::Module module;
const char *arg;
arg = "../library/torchscript.pt";
module = torch::jit::load(arg);
std::vector<torch::jit::IValue> inputs;
float data[] = {I[0], I[1], I[2], I[3], I[4]};
inputs.push_back(torch::from_blob(data, {1, 5}));
at::Tensor outputs = module.forward(inputs).toTensor();
for (int k = 0; k < 10; k++) {
G[k] = outputs[0][k].item().to<float>();
}
return;
}
Project/library/call_ts.h
#pragma once
#ifndef CALL_TS_API
#include "call_ts_export.h"
#define CALL_TS_API CALL_TS_EXPORT
#endif
#ifdef __cplusplus
extern "C" {
#endif
CALL_TS_API
void invariant_nn(float I[], float G[]);
#ifdef __cplusplus
}
#endif
I am trying to do the ros_wiki tutorial to convert ROS images to OpenCV images, but when I try to compile my catkin workspace I get several pages of errors that all have the following form:
CMakeFiles/image_converter.dir/scripts/image_converter.cpp.o: In function `cv::String::~String()':
image_converter.cpp:(.text._ZN2cv6StringD2Ev[_ZN2cv6StringD5Ev]+0x14): undefined reference to `cv::String::deallocate()'
I've googled around a lot and found various similar problems, but those problems were all fixed by updating the CMakeLists.txt file and the package.xml file, which did not work for me.
Here is an example of one such solution - they all center around including
find_package( OpenCV REQUIRED )
include_directories( ${OpenCV_INCLUDE_DIRS} )
target_link_libraries( your_target ${OpenCV_LIBS} )
in the cmakelists, but I have those already.
My C++ code:
#include <ros/ros.h>
#include <cv_bridge/cv_bridge.h>
#include <image_transport/image_transport.h>
#include <sensor_msgs/image_encodings.h>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/core/mat.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <string>
#include <iostream>
const std::string wn = "OCV_window";
class ImageConverter
{
ros::NodeHandle nh_;
image_transport::ImageTransport it_;
image_transport::Subscriber image_sub_;
image_transport::Publisher image_pub_;
public:
ImageConverter()
: it_(nh_)
{
image_sub_ = it_.subscribe("/camera/color/image_raw", 1, &ImageConverter::imageCb, this);
image_pub_ = it_.advertise("/image_editor/output_image", 1);
cv::namedWindow(wn);
}
~ImageConverter()
{
cv::destroyWindow(wn);
}
void imageCb(const sensor_msgs::ImageConstPtr& incoming_message)
{
cv_bridge::CvImagePtr cvi;
try
{
cvi = cv_bridge::toCvCopy(incoming_message, sensor_msgs::image_encodings::RGB8);
}
catch (cv_bridge::Exception& e)
{
ROS_ERROR("CV_Bridge Exception: %s", e.what());
return;
}
cv::imshow(wn, cvi->image);
cv::waitKey(3);
image_pub_.publish(cvi->toImageMsg());
}
};
int main(int argc, char** argv)
{
ros::init(argc,argv, "Image_Converter");
ImageConverter ic;
ros::spin();
return(0);
}
My cmakelists file:
cmake_minimum_required(VERSION 2.8.3)
project(odom_reporter)
add_compile_options(-std=c++11)
find_package(catkin REQUIRED COMPONENTS
geometry_msgs
nav_msgs
roscpp
rospy
std_msgs
message_generation
image_transport
cv_bridge
sensor_msgs
OpenCV REQUIRED
)
add_service_files(
FILES
HMD.srv
)
catkin_package()
include_directories(
${OpenCV_INCLUDE_DIRS}
${catkin_INCLUDE_DIRS}
)
add_executable (image_converter scripts/image_converter.cpp)
target_link_libraries (image_converter ${OpenCV_LIBS})
My package.xml file:
<?xml version="1.0"?>
<package format="2">
<name>odom_reporter</name>
<version>0.0.0</version>
<description>The odom_reporter package</description>
<maintainer email="calabrnb#mail.uc.edu">Nate Calabrese</maintainer>
<license>BSD</license>
<buildtool_depend>catkin</buildtool_depend>
<build_depend>geometry_msgs</build_depend>
<build_depend>nav_msgs</build_depend>
<build_depend>roscpp</build_depend>
<build_depend>rospy</build_depend>
<build_depend>std_msgs</build_depend>
<build_depend>message_generation</build_depend>
<build_depend>image_transport</build_depend>
<build_depend>cv_bridge</build_depend>
<build_depend>sensor_msgs</build_depend>
<build_depend>OpenCV</build_depend>
<build_export_depend>geometry_msgs</build_export_depend>
<build_export_depend>nav_msgs</build_export_depend>
<build_export_depend>roscpp</build_export_depend>
<build_export_depend>rospy</build_export_depend>
<build_export_depend>std_msgs</build_export_depend>
<build_export_depend>message_runtime</build_export_depend>
<build_export_depend>image_transport</build_export_depend>
<build_export_depend>cv_bridge</build_export_depend>
<build_export_depend>sensor_msgs</build_export_depend>
<build_export_depend>OpenCV</build_export_depend>
<exec_depend>geometry_msgs</exec_depend>
<exec_depend>nav_msgs</exec_depend>
<exec_depend>roscpp</exec_depend>
<exec_depend>rospy</exec_depend>
<exec_depend>std_msgs</exec_depend>
<exec_depend>message_runtime</exec_depend>
<exec_depend>image_transport</exec_depend>
<exec_depend>cv_bridge</exec_depend>
<exec_depend>sensor_msgs</exec_depend>
<exec_depend>OpenCV</exec_depend>
<export>
</export>
</package>
That should work, but there's a problem: find_package() only supports finding one package per call.
You're currently trying to find two packages with it, and not finding the second one, OpenCV:
find_package(catkin REQUIRED COMPONENTS
image_transport
cv_bridge
...
)
find_package(OpenCV REQUIRED)
find_package(<package> [version] [EXACT] [QUIET] [MODULE]
[REQUIRED] [[COMPONENTS] [components...]]
[OPTIONAL_COMPONENTS components...]
[NO_POLICY_SCOPE])
I'm working with a slam system, i've install dso, which the code can be seen here::
https://github.com/JakobEngel/dso
Everything works fine, I manage to compile and run without errors. But know I want to parallelize the code, using CUDA. I'm having lot's of trouble adapting it's CMakeLists.txt in order to be able to use CUDA. The original CMakeLists from dso is available here:
dso CMakeLists.txt
I'm trying to adapt it basing my changes on this implementation of another author on another SLAM system:
ORB SLAM 2 CMakeLists.txt using CUDA
Right now my CMakeLists, with my changes (not working), is like this:
SET(PROJECT_NAME DSO)
PROJECT(${PROJECT_NAME})
CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
#set(CMAKE_VERBOSE_MAKEFILE ON)
set(BUILD_TYPE Release)
#set(BUILD_TYPE RelWithDebInfo)
set(EXECUTABLE_OUTPUT_PATH bin)
set(LIBRARY_OUTPUT_PATH lib)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
# required libraries
#SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "/usr/include")
find_package(SuiteParse REQUIRED)
find_package(Eigen3 REQUIRED)
find_package(Boost)
# optional libraries
find_package(LibZip QUIET)
find_package(Pangolin 0.2 QUIET)
find_package(OpenCV QUIET)
#find_package(OpenACC)
# flags
add_definitions("-DENABLE_SSE")
set(CMAKE_CXX_FLAGS
"${SSE_FLAGS} -O3 -g -std=c++11"
)
set(CMAKE_C_FLAGS
"${SSE_FLAGS} -O3 -g -std=c++11"
)
#LIST(APPEND CMAKE_C_FLAGS "-Wall -Wextra -DUSE_NVTX") <<<< Error: doesn't recognize -Wall -Wextra
#LIST(APPEND CMAKE_CXX_FLAGS "-Wall -Wextra -DUSE_NVTX") << Error: doesn't recognize -Wall -Wextra
find_package(CUDA REQUIRED)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
SET(CUDA_HOST_COMPILER /usr/bin/g++)
LIST(APPEND CUDA_NVCC_FLAGS "--compiler-options -fno-strict-aliasing -use_fast_math -ccbin gcc-5")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++11")
if (MSVC)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
endif (MSVC)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY lib)
cuda_include_directories(
${CUDA_TOOLKIT_ROOT_DIR}/samples/common/inc
)
# Sources files
set(dso_SOURCE_FILES
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystem.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystemOptimize.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystemOptPoint.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystemDebugStuff.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystemMarginalize.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/Residuals.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/CoarseTracker.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/CoarseInitializer.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/ImmaturePoint.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/HessianBlocks.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/PixelSelector2.cpp
${PROJECT_SOURCE_DIR}/src/OptimizationBackend/EnergyFunctional.cpp
${PROJECT_SOURCE_DIR}/src/OptimizationBackend/AccumulatedTopHessian.cpp
${PROJECT_SOURCE_DIR}/src/OptimizationBackend/AccumulatedSCHessian.cpp
${PROJECT_SOURCE_DIR}/src/OptimizationBackend/EnergyFunctionalStructs.cpp
${PROJECT_SOURCE_DIR}/src/util/settings.cpp
${PROJECT_SOURCE_DIR}/src/util/Undistort.cpp
${PROJECT_SOURCE_DIR}/src/util/globalCalib.cpp
)
include_directories(
${PROJECT_SOURCE_DIR}/src
${PROJECT_SOURCE_DIR}/thirdparty/Sophus
${PROJECT_SOURCE_DIR}/thirdparty/sse2neon
${EIGEN3_INCLUDE_DIR}
)
# decide if we have pangolin
if (Pangolin_FOUND)
message("--- found PANGOLIN, compiling dso_pangolin library.")
include_directories( ${Pangolin_INCLUDE_DIRS} )
set(dso_pangolin_SOURCE_FILES
${PROJECT_SOURCE_DIR}/src/IOWrapper/Pangolin/KeyFrameDisplay.cpp
${PROJECT_SOURCE_DIR}/src/IOWrapper/Pangolin/PangolinDSOViewer.cpp)
set(HAS_PANGOLIN 1)
else ()
message("--- could not find PANGOLIN, not compiling dso_pangolin library.")
message(" this means there will be no 3D display / GUI available for dso_dataset.")
set(dso_pangolin_SOURCE_FILES )
set(HAS_PANGOLIN 0)
endif ()
# decide if we have openCV
if (OpenCV_FOUND)
message("--- found OpenCV, compiling dso_opencv library.")
include_directories( ${OpenCV_INCLUDE_DIRS} )
set(dso_opencv_SOURCE_FILES
${PROJECT_SOURCE_DIR}/src/IOWrapper/OpenCV/ImageDisplay_OpenCV.cpp
${PROJECT_SOURCE_DIR}/src/IOWrapper/OpenCV/ImageRW_OpenCV.cpp)
set(HAS_OPENCV 1)
else ()
message("--- could not find OpenCV, not compiling dso_opencv library.")
message(" this means there will be no image display, and image read / load functionality.")
set(dso_opencv_SOURCE_FILES
${PROJECT_SOURCE_DIR}/src/IOWrapper/ImageDisplay_dummy.cpp
${PROJECT_SOURCE_DIR}/src/IOWrapper/ImageRW_dummy.cpp)
set(HAS_OPENCV 0)
endif ()
# decide if we have ziplib.
if (LIBZIP_LIBRARY)
message("--- found ziplib (${LIBZIP_VERSION}), compiling with zip capability.")
add_definitions(-DHAS_ZIPLIB=1)
include_directories( ${LIBZIP_INCLUDE_DIR_ZIP} ${LIBZIP_INCLUDE_DIR_ZIPCONF} )
else()
message("--- not found ziplib (${LIBZIP_LIBRARY}), compiling without zip capability.")
set(LIBZIP_LIBRARY "")
endif()
# compile main library.
include_directories( ${CSPARSE_INCLUDE_DIR} ${CHOLMOD_INCLUDE_DIR})
cuda_add_library(dso SHARED ${dso_SOURCE_FILES} ${dso_opencv_SOURCE_FILES} ${dso_pangolin_SOURCE_FILES}
${PROJECT_SOURCE_DIR}/src/teste.cu
)
#set_property( TARGET dso APPEND_STRING PROPERTY COMPILE_FLAGS -Wall )
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # OSX
set(BOOST_THREAD_LIBRARY boost_thread-mt)
else()
set(BOOST_THREAD_LIBRARY boost_thread)
endif()
# build main executable (only if we have both OpenCV and Pangolin)
if (OpenCV_FOUND AND Pangolin_FOUND)
message("--- compiling dso_dataset.")
add_executable(dso_dataset ${PROJECT_SOURCE_DIR}/src/main_dso_pangolin.cpp)
target_link_libraries(dso_dataset dso boost_system cxsparse ${BOOST_THREAD_LIBRARY} ${LIBZIP_LIBRARY} ${Pangolin_LIBRARIES} ${OpenCV_LIBS})
else()
message("--- not building dso_dataset, since either don't have openCV or Pangolin.")
endif()
unset(CMAKE_RUNTIME_OUTPUT_DIRECTORY)
So, 'main_dso_pangolin.cpp' is my main file. At this point, with only this changes the code compiles. But i wanted to try if i was able to make some CUDA code. In order to do this I created a 'teste.cu' file, that has the same code as one of the cuda samples, like this:
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
// CUDA runtime
#include </usr/local/cuda-9.0/include/cuda_runtime.h>
#include <cuda.h>
// helper functions and utilities to work with CUDA
#include </usr/local/cuda-9.0/samples/common/inc/helper_functions.h>
#include </usr/local/cuda-9.0/samples/common/inc/helper_cuda.h>
__global__ static void timedReduction(const float *input, float *output, clock_t *timer)
{
// __shared__ float shared[2 * blockDim.x];
extern __shared__ float shared[];
const int tid = threadIdx.x;
const int bid = blockIdx.x;
if (tid == 0) timer[bid] = clock();
// Copy input.
shared[tid] = input[tid];
shared[tid + blockDim.x] = input[tid + blockDim.x];
// Perform reduction to find minimum.
for (int d = blockDim.x; d > 0; d /= 2)
{
__syncthreads();
if (tid < d)
{
float f0 = shared[tid];
float f1 = shared[tid + d];
if (f1 < f0)
{
shared[tid] = f1;
}
}
}
// Write result.
if (tid == 0) output[bid] = shared[0];
__syncthreads();
if (tid == 0) timer[bid+gridDim.x] = clock();
}
#define NUM_BLOCKS 64
#define NUM_THREADS 256
void xx(int argc, char** argv){
printf("CUDA Clock sample\n");
// This will pick the best possible CUDA capable device
int dev = findCudaDevice(argc, (const char **)argv);
float *dinput = NULL;
float *doutput = NULL;
clock_t *dtimer = NULL;
clock_t timer[NUM_BLOCKS * 2];
float input[NUM_THREADS * 2];
for (int i = 0; i < NUM_THREADS * 2; i++)
{
input[i] = (float)i;
}
checkCudaErrors(cudaMalloc((void **)&dinput, sizeof(float) * NUM_THREADS * 2));
checkCudaErrors(cudaMalloc((void **)&doutput, sizeof(float) * NUM_BLOCKS));
checkCudaErrors(cudaMalloc((void **)&dtimer, sizeof(clock_t) * NUM_BLOCKS * 2));
checkCudaErrors(cudaMemcpy(dinput, input, sizeof(float) * NUM_THREADS * 2, cudaMemcpyHostToDevice));
timedReduction<<<NUM_BLOCKS, NUM_THREADS, sizeof(float) * 2 *NUM_THREADS>>>(dinput, doutput, dtimer);
checkCudaErrors(cudaMemcpy(timer, dtimer, sizeof(clock_t) * NUM_BLOCKS * 2, cudaMemcpyDeviceToHost));
checkCudaErrors(cudaFree(dinput));
checkCudaErrors(cudaFree(doutput));
checkCudaErrors(cudaFree(dtimer));
long double avgElapsedClocks = 0;
for (int i = 0; i < NUM_BLOCKS; i++)
{
avgElapsedClocks += (long double) (timer[i + NUM_BLOCKS] - timer[i]);
}
avgElapsedClocks = avgElapsedClocks/NUM_BLOCKS;
printf("Average clocks/block = %Lf\n", avgElapsedClocks);
}
And in my main, the first thing i do is to call this function. This time, when i do 'cmake' and 'make i get errors like:
/home/cesar/Documents/dso/src/teste.cu:18:21: error: ‘threadIdx’ was not declared in this scope
const int tid = threadIdx.x;
/home/cesar/Documents/dso/src/teste.cu:19:21: error: ‘blockIdx’ was not declared in this scope
const int bid = blockIdx.x;
I've install CUDA Toolkit correctly, but here is the version:
cesar#cesar-X550JX:/usr/local/cuda/bin$ /usr/local/cuda/bin/nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2017 NVIDIA Corporation
Built on Fri_Sep__1_21:08:03_CDT_2017
Cuda compilation tools, release 9.0, V9.0.176
What do you think i'm doing wrong or i'm missing? I'm having many difficulties adapting CMakeLists.txt due to its complexity and well defined structure.
--- EDIT ---
Running with make -j VERBOSE=1 i get this messages which tells me that a regular c++ compiler is being used:
/usr/bin/c++ -fPIC -O3 -g -std=c++11 -D_FORCE_INLINES -shared -Wl,-soname,libdso.so -o lib/libdso.so CMakeFiles/dso.dir/src/FullSystem/FullSystem.cpp.o CMakeFiles/dso.dir/src/FullSystem/FullSystemOptimize.cpp.o CMakeFiles/dso.dir/src/FullSystem/FullSystemOptPoint.cpp.o CMakeFiles/dso.dir/src/FullSystem/FullSystemDebugStuff.cpp.o CMakeFiles/dso.dir/src/FullSystem/FullSystemMarginalize.cpp.o CMakeFiles/dso.dir/src/FullSystem/Residuals.cpp.o CMakeFiles/dso.dir/src/FullSystem/CoarseTracker.cpp.o CMakeFiles/dso.dir/src/FullSystem/CoarseInitializer.cpp.o CMakeFiles/dso.dir/src/FullSystem/ImmaturePoint.cpp.o CMakeFiles/dso.dir/src/FullSystem/HessianBlocks.cpp.o CMakeFiles/dso.dir/src/FullSystem/PixelSelector2.cpp.o CMakeFiles/dso.dir/src/OptimizationBackend/EnergyFunctional.cpp.o CMakeFiles/dso.dir/src/OptimizationBackend/AccumulatedTopHessian.cpp.o CMakeFiles/dso.dir/src/OptimizationBackend/AccumulatedSCHessian.cpp.o CMakeFiles/dso.dir/src/OptimizationBackend/EnergyFunctionalStructs.cpp.o CMakeFiles/dso.dir/src/util/settings.cpp.o CMakeFiles/dso.dir/src/util/Undistort.cpp.o CMakeFiles/dso.dir/src/util/globalCalib.cpp.o CMakeFiles/dso.dir/src/IOWrapper/OpenCV/ImageDisplay_OpenCV.cpp.o CMakeFiles/dso.dir/src/IOWrapper/OpenCV/ImageRW_OpenCV.cpp.o CMakeFiles/dso.dir/src/IOWrapper/Pangolin/KeyFrameDisplay.cpp.o CMakeFiles/dso.dir/src/IOWrapper/Pangolin/PangolinDSOViewer.cpp.o CMakeFiles/dso.dir/src/dso_generated_teste.cu.o /usr/local/cuda/lib64/libcudart_static.a -lpthread -ldl -lrt
[ 96%] Building CXX object CMakeFiles/dso_dataset.dir/src/main_dso_pangolin.cpp.o
/usr/bin/c++ -DENABLE_SSE -DHAS_ZIPLIB=1 -I/usr/include/opencv -I/home/cesar/Documents/dso/src -I/home/cesar/Documents/dso/thirdparty/Sophus -I/home/cesar/Documents/dso/thirdparty/sse2neon -I/usr/include/eigen3 -I/home/cesar/Documents/Pangolin/include -I/home/cesar/Documents/Pangolin/build/src/include -I/usr/local/include -I/usr/include/suitesparse -I/usr/local/cuda/include -O3 -g -std=c++11 -D_FORCE_INLINES -o CMakeFiles/dso_dataset.dir/src/main_dso_pangolin.cpp.o -c /home/cesar/Documents/dso/src/main_dso_pangolin.cpp
I also tried to separate .cpp files from .cu files, used add_library for .cpp and cuda_add_library for .cu files, like this:
add_library(dso ${dso_SOURCE_FILES} ${dso_opencv_SOURCE_FILES} ${dso_pangolin_SOURCE_FILES})
cuda_add_library(my_cuda_lib ${PROJECT_SOURCE_DIR}/src/teste.cu)
And then use my_cuda_lib in target_link_libraries, like this:
target_link_libraries(dso_dataset dso boost_system cxsparse ${BOOST_THREAD_LIBRARY} ${LIBZIP_LIBRARY} ${Pangolin_LIBRARIES} ${OpenCV_LIBS} ${CUDA_LIBRARIES} my_cuda_lib)
But still got the same errors.
-- EDIT: MCVE ---
To demonstrate my error i created a simple example. I have 2 simple files, my main which is a .cpp and my cuda file .cu. My main just calls the function on the other file, looks like this:
#include <iostream>
#include "hello_world.cu"
using namespace std;
int main()
{
teste();
return 0;
}
And my .cu file looks like this:
#include <stdio.h>
#include <iostream>
// CUDA runtime
#include </usr/local/cuda-9.0/include/cuda_runtime.h>
// helper functions and utilities to work with CUDA
#include </usr/local/cuda-9.0/samples/common/inc/helper_functions.h>
#include </usr/local/cuda-9.0/samples/common/inc/helper_cuda.h>
__global__ void kernel (void){
extern __shared__ float shared[];
const int tid = threadIdx.x;
const int bid = blockIdx.x;
}
int teste( void ) {
kernel<<<1,1>>>();
printf( "Hello, World!\n" );
return 0;
}
My CMakeLists.txt that i made to compile this looks like this:
cmake_minimum_required(VERSION 2.8)
set(CUDA_HOST_COMPILER /usr/bin/g++-5)
find_package(CUDA QUIET REQUIRED)
# Pass options to NVCC
set(
CUDA_NVCC_FLAGS
${CUDA_NVCC_FLAGS};
-O3
)
# For compilation ...
# Specify target & source files to compile it from
cuda_add_executable(
helloworld
hello_world.cu
teste.cpp
)
After making cmake and running with "cmake --build ." (i don't know why it has to be this command, normally i just do make -j, but in this example only this works) i get the same errors as in my project, ‘threadIdx’ was not declared in this scope, same for 'blockIdx' etc..
Since you are including hello_world.cu file in your main code, then you want to have it compiled with nvcc compiler. To achieve this change name of teste.cpp file to teste.cu (otherwise g++ will be used).
Also remove 'hello_world.cu' from CMakeLists.txt (it is included already in teste file) to have something like this:
cuda_add_executable(
helloworld
teste.cu
)
Then it should work.
-- EDIT: Additional question --
If you want to keep your .cpp file then you need kind of separation between what g++ can do for you and what nvcc should. So you can introduce to your project additional hello_world.h file:
#ifndef HELLO_WORLD_H
#define HELLO_WORLD_H
int teste();
#endif
include it in your teste.cpp:
#include <iostream>
#include "hello_world.h"
using namespace std;
int main()
{
teste();
return 0;
}
and then your CMakeLists.txt looks like in your original example:
...
cuda_add_executable(
helloworld
teste.cpp
hello_world.cu
)
In such a case hello_world.cu will be compiled with nvcc, and then compilling and linking of teste.cpp will be done by g++ (which will be possible in that case since there is no CUDA code in teste.cpp).
I am using BLAS_LIB in my project to perform matrix multiplication. This is my CMakelist.txt. It was missing the path to BLAS_LIB and BLAS_INCLUDE file. I think it is the reason for the below error. If is the correct reason, please help me to fix it?
This is my CMakeList
cmake_minimum_required(VERSION 2.6)
project(testconv)
if(UNIX)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=gnu++0x")
endif()
add_executable(testconv main.cpp)
install(TARGETS testconv RUNTIME DESTINATION bin)
This is the error
/home/projects/TestConv/main.cpp:24: undefined reference to `cblas_dgemm'
This is my cpp file
#include <iostream>
#include <random>
#include <cblas.h>
int main ( int argc, char* argv[] ) {
//Random numbers
std::mt19937_64 rnd;
std::uniform_real_distribution<double> doubleDist(0, 1);
//Create Arrays that represent the matrices A,B,C
const int n = 20;
double* A = new double[n*n];
double* B = new double[n*n];
double* C = new double[n*n];
//Fill A and B with random numbers
for(uint i =0; i <n; i++){
for(uint j=0; j<n; j++){
A[i*n+j] = doubleDist(rnd);
B[i*n+j] = doubleDist(rnd);
}
}
//Calculate A*B=C
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, 1.0, A, n, B, n, 0.0, C, n);
//Clean up
delete[] A;
delete[] B;
delete[] C;
return 0;
}
Yes, you're completely forgetting to include the BLAS library.
For your CMake version CMake includes a FindBLAS.cmake file, which will not work in your case.
You will need FindCBLAS.cmake that correctly will find the C interface to BLAS as done in other open source projects.
You can grab for example the following https://github.com/Eyescale/CMake/blob/master/FindCBLAS.cmake.
First of all you will need to create a directory called cmake in your project root directory (not in the build directory!!!), then copy that file I just mentioned (FindCBLAS.cmake).
You will need to modify your CMakeLists.txt in the following:
cmake_minimum_required(VERSION 2.6)
project(testconv)
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
find_package(CBLAS REQUIRED)
include_directories(${CBLAS_INCLUDE_DIR})
if(UNIX)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=gnu++0x")
endif()
add_executable(testconv main.cpp)
target_link_libraries(testconv ${CBLAS_LIBRARIES})
install(TARGETS testconv RUNTIME DESTINATION bin)
Try that. It can be improved, but should get you started.
I am using the cmakelists.txt below, that exists in the top folder of my project, and I am trying to connect mpg123 and ao to my project. In one .cpp file of the source folder I added a code with mpg123 and ao to play a song. This code can be compiled with this line:
g++ mpg.cpp -lmpg123 -lao -o mpg
I also added this line:
target_link_libraries(emotime ${LIBAO_LIBRARIES} ${MPG123_LIBRARIES})
in the cmakelists that exists in my source folder. When I am running make I get errors like "undefined reference to ao_initialize'" and "undefined reference tompg123_init'" in this piece of code:
int playaudio(int trackid)
{
char * traklink="";
int tid=trackid;
if (tid==1){
traklink= "/home/mixa/tutti_frutti.wav";
}
else if (tid==2){
traklink= "/home/mixa/karavi.wav";
}
else if (tid==3){
traklink= "/home/mixa/timon.wav";
}
else if (tid==4){
traklink= "/home/mixa/hippo.wav";
}
else{
traklink= "/home/mixa/nanourisma.wav";
}
mpg123_handle *mh;
unsigned char *buffer;
size_t buffer_size;
size_t done;
int err;
int driver;
ao_device *dev;
ao_sample_format format;
int channels, encoding;
long rate;
/* if(argc < 2)
exit(0);
*/
/* initializations */
ao_initialize();
driver = ao_default_driver_id();
mpg123_init();
mh = mpg123_new(NULL, &err);
buffer_size = mpg123_outblock(mh);
buffer = (unsigned char*) malloc(buffer_size * sizeof(unsigned char));
/* open the file and get the decoding format */
//mpg123_open(mh,traklink);
mpg123_open(mh,traklink);
mpg123_getformat(mh, &rate, &channels, &encoding);
/* set the output format and open the output device */
format.bits = mpg123_encsize(encoding) * BITS;
format.rate = rate;
format.channels = channels;
format.byte_format = AO_FMT_NATIVE;
format.matrix = 0;
dev = ao_open_live(driver, &format, NULL);
/* decode and play */
while (mpg123_read(mh, buffer, buffer_size, &done) == MPG123_OK)
//ao_play(dev, buffer, done);
ao_play(dev, (char*)buffer, done);
/* clean up */
free(buffer);
ao_close(dev);
mpg123_close(mh);
mpg123_delete(mh);
mpg123_exit();
ao_shutdown();
//system("mpg123 -q traklink");
// return 0;
}
void *threading (void *trackid)
{
long tid=(long)trackid;
cout<<"sound plays on:Thread id, "<<tid<<endl;
playaudio(tid);
pthread_exit(NULL);
}
cmakelists:
cmake_minimum_required(VERSION 2.8)
project(emotime)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
FIND_PATH( MPG123_INCLUDE_DIR1
NAMES mpg123.h
PATH_SUFFIXES include
PATHS
~/Library/Frameworks
/Library/Frameworks
/usr/local
/usr
/sw # Fink
/opt/local # DarwinPorts
/opt/csw # Blastwave
/opt
extern/mpg123/ports/MSVC++
extern/mpg123/ports/Xcode
)
IF( MPG123_INCLUDE_DIR1 )
SET( MPG123_INCLUDE_DIRS ${MPG123_INCLUDE_DIRS} ${MPG123_INCLUDE_DIR1} )
ENDIF( MPG123_INCLUDE_DIR1 )
# Include dir (May not be necessary on all platforms)
FIND_PATH( MPG123_INCLUDE_DIR2
NAMES mpg123.h.in
PATHS
~/Library/Frameworks
/Library/Frameworks
/usr/local
/usr
/sw # Fink
/opt/local # DarwinPorts
/opt/csw # Blastwave
/opt
extern/mpg123/src/libmpg123
)
IF( MPG123_INCLUDE_DIR2 )
SET( MPG123_INCLUDE_DIRS ${MPG123_INCLUDE_DIRS} ${MPG123_INCLUDE_DIR2} )
ENDIF( MPG123_INCLUDE_DIR2 )
#MESSAGE( "MPG123_INCLUDE_DIR1: " ${MPG123_INCLUDE_DIR1} )
#MESSAGE( "MPG123_INCLUDE_DIR2: " ${MPG123_INCLUDE_DIR2} )
#MESSAGE( "MPG123_INCLUDE_DIRS: " ${MPG123_INCLUDE_DIRS} )
FIND_LIBRARY( MPG123_LIBRARIES
NAMES mpg123 libmpg123.lib
HINTS
PATH_SUFFIXES lib64 lib libs64 libs libs/Win32 libs/Win64 Release Debug
PATHS
~/Library/Frameworks
/Library/Frameworks
/usr/local
/usr
/sw
/opt/local
/opt/csw
/opt
extern/mpg123/ports/MSVC++/2005
extern/mpg123/ports/MSVC++/2008
extern/mpg123/ports/MSVC++/2008clr
extern/mpg123/ports/MSVC++/2010
)
SET( MPG123_FOUND 0 )
IF( MPG123_LIBRARIES AND MPG123_INCLUDE_DIRS )
SET( MPG123_FOUND 1 )
MESSAGE( STATUS "mpg123 found!" )
ELSE( MPG123_LIBRARIES AND MPG123_INCLUDE_DIRS )
MESSAGE( STATUS "mpg123 not found..." )
ENDIF( MPG123_LIBRARIES AND MPG123_INCLUDE_DIRS )
FIND_PATH(LIBAO_INCLUDE_DIR ao.h /usr/include/ao /usr/local/include/ao)
FIND_LIBRARY(LIBAO_LIBRARIES NAMES ao PATH /usr/lib /usr/local/lib)
IF (LIBAO_INCLUDE_DIR AND LIBAO_LIBRARIES)
SET(LIBAO_FOUND TRUE)
ENDIF (LIBAO_INCLUDE_DIR AND LIBAO_LIBRARIES)
IF (LIBAO_FOUND)
IF (NOT LIBAO_FIND_QUIETLY)
MESSAGE(STATUS "Found libao: ${LIBAO_LIBRARIES}")
ENDIF (NOT LIBAO_FIND_QUIETLY)
ELSE (LIBAO_FOUND)
IF (LIBAO_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "Could not find libao")
ENDIF (LIBAO_FIND_REQUIRED)
ENDIF (LIBAO_FOUND)
include_directories(${LIBAO_INCLUDE_DIR} ${MPG123_INCLUDE_DIRS})
set(ASSETDIR "${emotime_SOURCE_DIR}/assets" )
set(HEADERDIR "${emotime_SOURCE_DIR}/include" )
set(SRCDIR "${emotime_SOURCE_DIR}/src" )
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
add_subdirectory(src
)
In CMake you achieve linking to those libraries using the following commands (don't add the libraries to CXX_FLAGS!):
include_directories(${LIBAO_INCLUDE_DIR} ${MPG123_INCLUDE_DIRS})
target_link_libraries(target_name ${LIBAO_LIBRARIES} ${MPG123_LIBRARIES})
were target_name needs to be replaced by your actual target's name.