In my code, I have a statically allocated array in global memory (i.e., allocated using __device__), which I want to sort using thrust::sort, which isn't working. All of the examples on this topic are using CUDA runtime allocated arrays (using cudaMalloc). Is there any way I can sort a statically allocated array?
I guess it has something to do with statically allocated memory not being accessible from the host. Using cudaMalloc-allocated arrays, it is working fine. However, I want to avoid using this type of allocation since static allocation allows for easier access to the data from device code (doesn't it?).
Minimal (not-) working example:
#include <stdio.h>
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#define N 4
typedef struct element {
int key;
int value;
__host__ __device__ bool operator<(element e) const
{ return key > e.key; }
} element;
__device__ element array[N];
__global__ void init() {
for (int i = 0; i < N; ++i) {
array[N - i - 1].key = i;
}
}
__global__ void print_array() {
for (int i = 0; i < N; ++i) {
printf("%d ", array[i].key);
}
printf("\n");
}
int main(void) {
thrust::device_ptr<element> array_first(array);
init<<<1,1>>>();
printf("unsorted: ");
print_array<<<1, 1>>>();
cudaDeviceSynchronize();
thrust::sort(array_first, array_first + N);
printf("sorted: ");
print_array<<<1, 1>>>();
cudaDeviceSynchronize();
}
Use cudaGetSymbolAddress to take the address of the array variable from a __host__ function:
void* array_ptr = 0;
cudaGetSymbolAddress(&array_ptr, array);
thrust::device_ptr<element> array_first(reinterpret_cast<element*>(array_ptr));
Here's the complete program:
#include <stdio.h>
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#define N 4
typedef struct element {
int key;
int value;
__host__ __device__ bool operator<(element e) const
{ return key > e.key; }
} element;
__device__ element array[N];
__global__ void init() {
for (int i = 0; i < N; ++i) {
array[N - i - 1].key = i;
}
}
__global__ void print_array() {
for (int i = 0; i < N; ++i) {
printf("%d ", array[i].key);
}
printf("\n");
}
int main(void) {
cudaError_t error;
void* array_ptr = 0;
if(error = cudaGetSymbolAddress(&array_ptr, array))
{
throw thrust::system_error(error, thrust::cuda_category());
}
thrust::device_ptr<element> array_first(reinterpret_cast<element*>(array_ptr));
init<<<1,1>>>();
printf("unsorted: ");
print_array<<<1, 1>>>();
if(error = cudaDeviceSynchronize())
{
throw thrust::system_error(error, thrust::cuda_category());
}
thrust::sort(array_first, array_first + N);
if(error = cudaDeviceSynchronize())
{
throw thrust::system_error(error, thrust::cuda_category());
}
printf("sorted: ");
print_array<<<1, 1>>>();
if(error = cudaDeviceSynchronize())
{
throw thrust::system_error(error, thrust::cuda_category());
}
return 0;
}
Here's the output on my system:
$ nvcc test.cu -run
unsorted: 3 2 1 0
sorted: 3 2 1 0
The sorted output is the same as the unsorted output, but I guess that is intentional given the way the data is generated and the definition of element::operator<.
This:
__device__ element array[N];
...
thrust::device_ptr<element> array_first(array);
is illegal. In host code, array is a host address and can't be passed to device code. Do something like this instead:
element* array_d;
cudaGetSymbolAddress((void **)&array_d, array);
thrust::device_ptr<element> array_first(array_d);
i.e. you need to use cudaGetSymbolAddress to read the address from the GPU context at runtime, then you can use the result of that call in GPU code.
Related
I'm attempting to implement an intvector in C++ and am getting a "Segmentation fault: 11" error. I understand this has something to do with memory management, and considering how new I am to C++ it could definitely be a pretty minor mistake. I debugged the code with valgrind and was given messages such as the following:
Use of uninitialized value of size 8, Invalid read of size 4,Conditional jump or move depends on uninitialized value(s).
My best guess is it has something to do with how I'm implementing the arrays. I originally had the arrays stored on the heap but changed it to the stack and still got the same error. I've already implemented an intvector in java, so I was attempting to use similar logic here, which perhaps is part of the issue.
#include <iostream>
#include "IntVector.h"
#include <cmath>
using namespace std;
int num_elements = 0;
int array_size = 0;
int expansion_factor;
void IntVector::expandArray(){
int tempArr[array_size*2];
for(int i =0;i<array_size;i++){
tempArr[i] = array[i];
}
array = tempArr;
array_size = array_size * 2;
}
void IntVector::add(int val){
int tempArr[array_size];
if(array_size == num_elements){
expandArray();
array[num_elements] = val;
}
else{
for(int i = 0;i<array_size;i++){
tempArr[i] = array[i];
}
tempArr[num_elements] = val;
array = tempArr;
}
num_elements++;
}
void IntVector::remove(int index){
}
int IntVector::get(int index) const{
return index;
}
void IntVector::removeLast(){
}
void IntVector::set(int index, int val){
}
std::string IntVector::toString()const {
return "";
}
IntVector::IntVector(int initial_size){
int* array = new int[initial_size];
}
IntVector:: ~IntVector(){
delete[] array;
}
int main(){
IntVector v(0);
v.add(5);
}
#ifndef INTVECTOR_H_
#define INTVECTOR_H_
using std::cout;
class IntVector {
private:
int* array;
int num_elements;
int array_size;
int expansion_factor;
void expandArray();
public:
void add(int val);
void remove(int index);
int get(int index) const;
void removeLast();
void set(int index, int val);
std::string toString() const;
IntVector(int initial_size);
~IntVector();
};
#endif
As mention in the comments, there are definitely some holes in your understanding of C++. Really when dealing with header files you should have a main.cpp, someotherfile.h, someotherfile.cpp. That just best practices to avoid redefinition errors.
There was quite a bit wrong with the way you accessed the private variable. If a class has a private( or even public) variable you don't have to redeclare it each time you want to change its value.
There were one or two major flaws with the way you expanded the vector. If the vector size is initialized to 0 then 0*2 is still 0 so you never actually increased the size. Secondly, when you set the original array = to the new array the new array was just a local array. This means that the memory wasn't actually allocated permanently, once the function ended the temparr was destroyed.
I know this was probably a lot but if you have any question feel free to ask.
main.cpp
#include "IntVector.h"
int main()
{
IntVector v;
IntVector x(10);
v.push(5);
v.push(5);
v.push(5);
v.push(5);
v.push(5);
v.print();
cout << endl;
x.push(5);
x.push(5);
x.push(5);
x.push(5);
x.push(5);
x.print();
return 0;
}
IntVector.h
#include <string>
#include <iostream>
using namespace std;
class IntVector {
private:
int *array;
int num_elements;
int array_size;
//int expansion_factor =; you would only need this if you plan on more than double the vector size
void expandArray(); //normally c++ array double in size each time they expand
public:
//Constructors
IntVector(); //this is a contructor for if nothing is called
IntVector(int initial_size);
//setters
void push(int val); //add
void pop(); //removelast
void remove(int index); //remove
void at(int index, int val); //set
//Getters
int at(int index);
//std::string toString(); I'm changing this to print
void print(); //will print the contents to the terminal
//Deconstructor
~IntVector();
};
IntVector.cpp
#include "IntVector.h"
//constructors
IntVector::IntVector() //no arguments given
{
array = new int[0];
num_elements = 0;
array_size = 0;
}
IntVector::IntVector(int initial_size)
{
array = new int[initial_size];
num_elements = 0;
array_size = initial_size;
}
void IntVector::expandArray()
{
int *tempArr;
if(array_size == 0){
array_size = 1;
tempArr = new int[1];
} else {
//make sure to allocate new memory
//you were creating a local array which was destroy after the function was completed
//using new will allow the array to exist outside the function
tempArr = new int[array_size * 2];
}
for (int i = 0; i < array_size; i++)
{
tempArr[i] = array[i];
}
//make sure to delete the old array otherwise there is a memory leak.
//c++ doesn't have a garbage collector
delete[] array;
array = tempArr;
array_size = array_size * 2;
}
void IntVector::push(int val)
{
num_elements++;
//checking if vector needs to increase
if (array_size <= num_elements)
{
expandArray();
array[num_elements-1] = val;
}
else
{
array[num_elements-1] = val;
}
}
void IntVector::remove(int index)
{
//not sure how to implment this becuase each element has to be a number.
}
int IntVector::at(int index)
{
return array[index];
}
void IntVector::pop()
{
num_elements = num_elements-1; //not really removing it from the "vector" but it won't print out again
}
void IntVector::at(int index, int val)
{
array[index] = val;
}
void IntVector::print()
{
for (int i = 0 ; i < num_elements; i++)
{
cout << array[i] << " ";
}
cout << endl;
}
IntVector::~IntVector()
{
delete[] array;
}
output
5 5 5 5 5
5 5 5 5 5
Hopefully, the comments help. I changed the name of the functions to better match the actual vecter class the already exists in C++. I think it's good to pick apart already defined functions like this because you get a better understanding of how they actually work and not just how to use them.
If you got any questions just leave a comment
My MCVC is compiling but is not functioning as intended. The goal is a genetic algorithm that performs the basics i.e. crossover,mutation,evolution. In the code I have provided should print out the good job statements but thats not the case. I am a new programmer, sorry.
My questions are:
1) The cpu and ram are rev'ed up, is this array declaration and implementation the cause for the uncontrolled spike?
std::array<std::auto_ptr<Individual>,50>myarray;
2) Is my 2.53 GHz Intel Core 2 Duo not up for it?
3) Should I cut down the amount of loops?
Any help is always welcomed !
Individual.h
#include <stdio.h>
#include <cstdlib>
#include <ctime>
#include <vector>
#include <array>
#include <iostream>
class Individual
{
public:
inline int getRandomNumber(int min = 0, int max = 1)
{
srand(static_cast<unsigned int>(time(0)));
static const double fraction = 1.0 / (static_cast<double>(RAND_MAX) + 1.0);
return static_cast<int>(rand() * fraction * (max - min + 1) + min);
}
private:
int defaultGeneLength = 64;
std::vector<char>genes;
int fitness = 0;
public:
Individual()
{
std::cout<<"Good Job";
}
//setters and getters
void generateIndividual();
void setDefaultGeneLength(int length);
char getGene(int index);
void setGene(int index, char value);
//public methods
unsigned int size();
int getFitness();
std::string toString();
};
Individual.cpp
#include "Individual.h"
void Individual::generateIndividual()
{
for (int i = 0; i < size(); i++)
{
genes.push_back(getRandomNumber());
}
}
//setters and getters
void Individual::setDefaultGeneLength(int length)
{
defaultGeneLength = length;
}
char Individual::getGene(int index)
{
return genes.at(index);
}
void Individual::setGene(int index, char value)
{
genes[index] = value;
fitness = 0;
}
//public methods
unsigned int Individual::size()
{
return genes.max_size();
}
int Individual::getFitness()
{
if(fitness == 0)
{
fitness = 1;
} return fitness;
}
std::string Individual::toString()
{
std::string geneString = "";
for (int i = 0; i < size(); i++)
{
geneString.append(getGene(i),1);
}
return geneString;
}
Population.h
#include "Individual.h"
class Population
{
std::array<std::auto_ptr<Individual>,50>myarray;
public:
Population(int populationSize, bool initialise)
{
std::cout<<"Good Job2";
if(initialise)
{
for (int i = 0; i < populationSize; ++i)
{
std::auto_ptr<Individual>newIndividual(new Individual());
myarray.at(i) = newIndividual;
myarray.at(i)->generateIndividual();
saveIndividual(i,*(myarray.at(i)));
}
}
std::cout<<"Good Job 3";
}
Individual getIndividual(int index);
Individual getFittest();
unsigned long size();
void saveIndividual (int index, Individual indiv);
~Population()
{
}
};
Population.cpp
#include "Population.h"
Individual Population::getIndividual(int index)
{
return *myarray.at(index);
}
Individual Population::getFittest()
{
Individual fittest = *myarray.at(0);
for (int i = 0; i < myarray.max_size(); i++)
{
if (fittest.getFitness() <= getIndividual(i).getFitness())
{
fittest = getIndividual(i);
}
}
return fittest;
}
unsigned long Population::size()
{
return myarray.max_size();
}
void Population::saveIndividual (int index, Individual indiv)
{
*myarray.at(index) = indiv;
}
Main.cpp
int main(int argc, const char * argv[]) {
Population *mypop = new Population(2,true);
delete mypop;
mypop = nullptr;
return 0;
}
unsigned int Individual::size()
{
return genes.max_size();
}
Your genes is a:
std::vector<char> genes;
The C++ standard defines std::vector::max_size() as follows:
distance(begin(), end()) for the largest possible container
It is not specified what "largest possible container" means. "Possible" could mean anything, like, if the system had a ten terabyte hard drive, so that the operating system could use the entire hard drive to page its virtual memory address space. That's certainly "possible", in some sense of the word. But it goes without saying that paging out ten terabytes will take a while.
With 64 bit gcc, the following simple program:
#include <iostream>
#include <vector>
int main()
{
std::vector<char> c;
std::cout << c.max_size() << std::endl;
return 0;
}
Produces the following output:
18446744073709551615
However, my chances of being able to actually create a vector of such size are not very good.
But let's get back to your code:
void Individual::generateIndividual()
{
for (int i = 0; i < size(); i++)
{
genes.push_back(getRandomNumber());
}
}
Ok. You must be feeling quite lucky. You believe that it will be possible for you to create a vector<char> that's max_size() big.
Are you quite sure about that?
I'm somewhat skeptical.
I have written the following code in C++, however found out that I have to convert it in C. I am not C or even C++ programmer, please help.
Can someone help me change this method to C directives, specifically vector implementation, following will not compile I have removed complexity to keep it simple. Thanks in anticipation.
__declspec(dllexport) std::vector<MY_STRUCT> WINAPI ABC(char *strVal)
{
MY_STRUCT f;
std::vector<MY_STRUCT> list = std::vector<MY_STRUCT>();
while (*dddd)
{ /*do the following for every feature in license file*/
f.attrib_num = fi.attrib_num;
f.attrib_lic = fi.attrib_lic;
list.push_back(f);
} /* end while(conf) */
dddd++;
printf("\n");
} /* end while (*dddd) */
return flist;
}
Here is implementation (also usage) of dynamic array of structs in C. You can adapt it to your structure; I used to post it on code review before
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct
{
int ID;
char * name;
} Student;
// array of structs
typedef struct
{
Student *array;
size_t used;
size_t size;
} Array;
void initArray(Array *a, size_t initialSize)
{
int i = 0;
// Allocate initial space
a->array = malloc(initialSize * sizeof(Student));
a->used = 0; // no elements used
a->size = initialSize; // available nr of elements
// Initialize all values of the array to 0
for(i = 0; i<initialSize; i++)
{
memset(&a->array[i],0,sizeof(Student));
}
}
// Add element to array
void addElement(Array *a, Student element)
{
int i = 0;
if (a->used == a->size)
{
a->size *= 2;
a->array = realloc(a->array, a->size * sizeof(Student));
// Initialize the last/new elements of the reallocated array
for(i = a->used; i<a->size; i++)
{
memset(&a->array[i],0,sizeof(Student));
}
}
// Copy name
a->array[a->used].name = (char*)malloc(strlen(element.name) + 1);
strcpy(a->array[a->used].name, element.name);
// Copy ID
a->array[a->used].ID=element.ID;
a->used++;
}
void freeArray(Array *a)
{
int i = 0;
// Free all name variables of each array element first
for(i=0; i<a->used; i++)
{
free(a->array[i].name);
a->array[i].name=NULL;
}
// Now free the array
free(a->array);
a->array = NULL;
a->used = 0;
a->size = 0;
}
int main(int argc, const char * argv[])
{
Array a;
Student x,y,z;
x.ID = 20;
x.name=malloc(strlen("stud1") + 1);
strcpy(x.name,"stud1");
y.ID = 30;
y.name=malloc(strlen("student2") + 1);
strcpy(y.name,"student2");
z.ID = 40;
z.name=malloc(strlen("student3") + 1);
strcpy(z.name,"student3");
// Init array, don't forget
initArray(&a, 5);
// Add elements
addElement(&a, x);
addElement(&a, y);
addElement(&a, z);
// Print elements
printf("%d\n", a.array[0].ID);
printf("%s\n", a.array[0].name);
printf("%d\n", a.array[1].ID);
printf("%s\n", a.array[1].name);
printf("%d\n", a.array[2].ID);
printf("%s\n", a.array[2].name);
// Free array
// don't forget
freeArray(&a);
free(x.name);
free(y.name);
free(z.name);
return 0;
}
I'm using an example code given to me by another C++ coder for a project. I'm a new student of C++ language and I wondered is there a possible memory leak / bugs in this class file given to me (PlacementHead.cpp):
#include "PlacementHead.h"
#include <string>
#include <iostream>
#include <string.h>
PlacementHead::PlacementHead(int width, int height, int gap, char* s) {
width_ = width;
height_ = height;
gap_ = gap;
size_ = (width*height)+1;
set_ = new char[size_ + 1];
from_ = new int[size_ + 1];
original_ = new char[size_ + 1];
strcpy(set_,s);
strcpy(original_,s);
}
PlacementHead::~PlacementHead() {
}
int PlacementHead::getSize() { return size_; }
int PlacementHead::getHeight() { return height_; }
int PlacementHead::getWidth() { return width_; }
int PlacementHead::getGap() { return gap_; }
// Palauttaa indeksissä i olevan suuttimen
char PlacementHead::getNozzle(int i) {
return set_[i-1];
}
// Asettaa indeksissä i olevan suuttimen
void PlacementHead::setNozzle(int i, char c) {
set_[i-1] = c;
}
// Merkitsee suuttimen poimituksi poistamalla sen listasta
void PlacementHead::markNozzle(int i, int bankPos) {
set_[i-1] = ' ';
from_[i-1] = bankPos;
}
// Palauttaa seuraavan poimimattoman suuttimen indeksin
int PlacementHead::getNextUnmarkedPos() {
for (int i=0; i<size_; i++) {
if (set_[i]!=' ') {
return i+1;
}
}
return 0;
}
// Palauttaa suuttimen alkuperäisen sijainnin pankissa
int PlacementHead::getBankPos(int i) {
return from_[i-1];
}
// Plauttaa alkuperäisen ladontapaan suutinjärjestyksen
void PlacementHead::reset() {
//for (int i=0; i<size_; i++) {
// set_[i] = original_[i];
//}
strcpy(set_,original_);
}
// Tulostusmetodi
void PlacementHead::print() {
std::cout << "ladontapaa:\n";
for (int h=height_; h>0; h--) {
for (int w=width_; w>0; w--) {
int i = ((h-1)*width_)+w;
std::cout << getNozzle(i);
}
std::cout << "\n";
}
}
PlacementHead.h:
#ifndef PLACEMENTHEAD_H
#define PLACEMENTHEAD_H
class PlacementHead {
public:
PlacementHead(int size, int rows, int gap, char* s);
~PlacementHead();
int getSize();
int getHeight();
int getWidth();
int getGap();
char getNozzle(int i);
void setNozzle(int i, char c);
void markNozzle(int i, int bankPos);
int getNextUnmarkedPos();
int getBankPos(int i);
void reset();
void print();
private:
char* set_;
int* from_;
char* original_;
int size_;
int width_;
int height_;
int gap_;
};
#endif
I notice that there is dynamic allocation of memory, but I don't see a delete anywhere...is this a problem? How could I fix this if it is a problem?
Thnx for any help!
P.S.
I noticed there is no keyword class used in this example?...Can you define a class like this?
It's impossible to say without seeing the class definition (the
header); if size_, etc. are something like
boost::shared_array, or std::unique_ptr, there is no leak.
If they are simply int*, there is a leak.
Of course, no C++ programmer would write this sort of code
anyway. The class would contain std::vector<int> and
std::string. Judging from what we see here, the author
doesn't know C++.
the code has leak . the constructor allocates the memory .Destructor or some other function have to clean that before the object gets destroyed
Another problem is that your code does not obey Rule of three (links here and here)
once you will write code like:
{
PlacementHead a(0,0,0,"asdsa");
PlacementHead b(0,0,0,"asdsa");
a = b; // line 1
} // here segfault
you will get segfault, in line 1, pointers will be copied from b to a, and once you will finally have destructors, pointers will be deleted twice, which is wrong. This is called shallow copy, you need deep copy, where new array will be allocated.
I am try to write a container memcpy function.
However, I can't pass my container as parameter.
Could anyone give me a hint?
At this moment, I create 1 more function inside class called DIn, to put information.
I try to achieve remove DIn function, and use like normal memory copy to do this.
Could somebody give me a help?
Best regards
TINGMING
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <assert.h>
template <typename VECTOR_TYPE = char,int VECTOR_SIZE = 256>
class vector{
private:
int SIZE;
VECTOR_TYPE ARRAY[VECTOR_SIZE];
public:
vector():SIZE(VECTOR_SIZE){
}
VECTOR_TYPE& operator[](int nIndex){
assert(nIndex >= 0 && nIndex < SIZE);
return ARRAY[nIndex];
}
void DIn(VECTOR_TYPE data,int nIndex){
assert(nIndex >= 0 && nIndex < SIZE);
ARRAY[nIndex] = data;
}
int sizeof_vector(){
return SIZE;
}
};
template <typename VECTOR_TYPE,int VECTOR_SIZE1,int VECTOR_SIZE2>
void vector_memcpy(
vector <VECTOR_TYPE,VECTOR_SIZE1> *target,int tIdx,
vector <VECTOR_TYPE,VECTOR_SIZE2> source,int sIdx,
int Length)
{
int i;
for(i=0;i<Length;i++)
target->DIn(source[sIdx+i],tIdx+i);
}
int main()
{
vector <int,16> AV;
vector <int,32> BV;
int i;
for(i=0;i<16;i++)
AV[i] = i*15;
vector_memcpy(&BV,0,AV,0,16);
vector_memcpy(&BV,16,BV,0,16);
for(i=0;i<16;i++)
printf("%2d: %3d %3d %3d\n",i,AV[i],BV[i],BV[i+16]);
return 0;
}