my following minimalist Cuda code returns an incorrect result (all polygons have 0 vertices at the end) while the same code running in serial in C++ is working well. The problem is embarrassingly parallel : no communication, no syncthreads etc., and the Cuda memory allocations are sucessful. Even my dummy variable that stores the content of the input array for debug purpose is 0 for the Cuda version. There is no access out of bounds since my arrays are largely large enough. Replacing the memcpy by a loop in Cuda doesn't change anything.
I really don't understand what happens... any idea ? Thanks!
Cuda code:
#include <stdio.h>
#include <iostream>
#include <stdlib.h>
#include <cuda.h>
class Point2D {
public:
__device__ Point2D(double xx=0, double yy=0):x(xx),y(yy){};
double x, y;
};
__device__ double dot(const Point2D &A, const Point2D &B) {
return A.x*B.x + A.y*B.y;
}
__device__ Point2D operator*(double a, const Point2D &P) {
return Point2D(a*P.x, a*P.y);
}
__device__ Point2D operator+(Point2D A, const Point2D &B) {
return Point2D(A.x + B.x, A.y + B.y);
}
__device__ Point2D operator-(Point2D A, const Point2D &B) {
return Point2D(A.x - B.x, A.y - B.y);
}
__device__ Point2D inter(const Point2D &A, const Point2D &B, const Point2D &C, const Point2D &D) { //intersects AB by *the mediator* of CD
Point2D M = 0.5*(C+D);
return A - (dot(A-M, D-C)/dot(B-A, D-C)) * (B-A);
}
class Polygon {
public:
__device__ Polygon():nbpts(0){};
__device__ void addPts(Point2D pt) {
pts[nbpts] = pt;
nbpts++;
};
__device__ Polygon& operator=(const Polygon& rhs) {
nbpts = rhs.nbpts;
dummy = rhs.dummy;
memcpy(pts, rhs.pts, nbpts*sizeof(Point2D));
return *this;
}
__device__ void cut(const Point2D &inside_pt, const Point2D &outside_pt) {
int new_nbpts = 0;
Point2D newpts[128];
Point2D AB(outside_pt-inside_pt);
Point2D M(0.5*(outside_pt+inside_pt));
double ABM = dot(AB, M);
Point2D S = pts[nbpts-1];
for (int i=0; i<nbpts; i++) {
Point2D E = pts[i];
double ddot = -ABM + dot(AB, E);
if (ddot<0) { // E inside clip edge
double ddot2 = -ABM + dot(AB, S);
if (ddot2>0) {
newpts[new_nbpts] = inter(S,E, inside_pt, outside_pt);
new_nbpts++;
}
newpts[new_nbpts] = E;
new_nbpts++;
} else {
double ddot2 = -ABM + dot(AB, S);
if (ddot2<0) {
newpts[new_nbpts] = inter(S,E, inside_pt, outside_pt);
new_nbpts++;
}
}
S = E;
}
memcpy(pts, newpts, min(128, new_nbpts)*sizeof(Point2D));
nbpts = new_nbpts;
}
//private:
Point2D pts[128];
int nbpts;
float dummy;
};
__global__ void cut_poly(float *a, Polygon* polygons, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx>=N/2) return;
Polygon pol;
pol.addPts(Point2D(0.,0.));
pol.addPts(Point2D(1.,0.));
pol.addPts(Point2D(1.,1.));
pol.addPts(Point2D(0.,1.));
Point2D curPt(a[2*idx], a[2*idx+1]);
for (int i=0; i<N/2; i++) {
Point2D other_pt(a[2*i], a[2*i+1]);
pol.cut(curPt, other_pt);
}
pol.dummy = a[idx];
polygons[idx] = pol;
}
int main(int argc, unsigned char* argv[])
{
const int N = 100;
float a_h[N], *a_d;
Polygon p_h[N/2], *p_d;
size_t size = N * sizeof(float);
size_t size_pol = N/2 * sizeof(Polygon);
cudaError_t err = cudaMalloc((void **) &a_d, size);
cudaError_t err2 = cudaMalloc((void **) &p_d, size_pol);
for (int i=0; i<N; i++) a_h[i] = (float)(rand()%1000)*0.001;
cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
int block_size = 4;
int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
cut_poly <<< n_blocks, block_size >>> (a_d, p_d, N);
cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
cudaMemcpy(p_h, p_d, sizeof(Polygon)*N/2, cudaMemcpyDeviceToHost);
for (int i=0; i<N/2; i++)
printf("%f \t %f \t %u\n", a_h[i], p_h[i].dummy, p_h[i].nbpts);
cudaFree(a_d);
cudaFree(p_d);
return 0;
}
Same code in C++ that works properly:
#include <stdio.h>
#include <iostream>
#include <stdlib.h>
class Point2D {
public:
Point2D(double xx=0, double yy=0):x(xx),y(yy){};
double x, y;
};
double dot(const Point2D &A, const Point2D &B) {
return A.x*B.x + A.y*B.y;
}
Point2D operator*(double a, const Point2D &P) {
return Point2D(a*P.x, a*P.y);
}
Point2D operator+(Point2D A, const Point2D &B) {
return Point2D(A.x + B.x, A.y + B.y);
}
Point2D operator-(Point2D A, const Point2D &B) {
return Point2D(A.x - B.x, A.y - B.y);
}
Point2D inter(const Point2D &A, const Point2D &B, const Point2D &C, const Point2D &D) { //intersects AB by *the mediator* of CD
Point2D M = 0.5*(C+D);
return A - (dot(A-M, D-C)/dot(B-A, D-C)) * (B-A);
}
class Polygon {
public:
Polygon():nbpts(0){};
void addPts(Point2D pt) {
pts[nbpts] = pt;
nbpts++;
};
Polygon& operator=(const Polygon& rhs) {
nbpts = rhs.nbpts;
dummy = rhs.dummy;
memcpy(pts, rhs.pts, nbpts*sizeof(Point2D));
return *this;
}
void cut(const Point2D &inside_pt, const Point2D &outside_pt) {
int new_nbpts = 0;
Point2D newpts[128];
Point2D AB(outside_pt-inside_pt);
Point2D M(0.5*(outside_pt+inside_pt));
double ABM = dot(AB, M);
Point2D S = pts[nbpts-1];
for (int i=0; i<nbpts; i++) {
Point2D E = pts[i];
double ddot = -ABM + dot(AB, E);
if (ddot<0) { // E inside clip edge
double ddot2 = -ABM + dot(AB, S);
if (ddot2>0) {
newpts[new_nbpts] = inter(S,E, inside_pt, outside_pt);
new_nbpts++;
}
newpts[new_nbpts] = E;
new_nbpts++;
} else {
double ddot2 = -ABM + dot(AB, S);
if (ddot2<0) {
newpts[new_nbpts] = inter(S,E, inside_pt, outside_pt);
new_nbpts++;
}
}
S = E;
}
memcpy(pts, newpts, std::min(128, new_nbpts)*sizeof(Point2D));
/*for (int i=0; i<128; i++) {
pts[i] = newpts[i];
}*/
nbpts = new_nbpts;
}
//private:
Point2D pts[128];
int nbpts;
float dummy;
};
void cut_poly(int idx, float *a, Polygon* polygons, int N)
{
if (idx>=N/2) return;
Polygon pol;
pol.addPts(Point2D(0.,0.));
pol.addPts(Point2D(1.,0.));
pol.addPts(Point2D(1.,1.));
pol.addPts(Point2D(0.,1.));
Point2D curPt(a[2*idx], a[2*idx+1]);
for (int i=0; i<N/2; i++) {
if (idx==i) continue;
Point2D other_pt(a[2*i], a[2*i+1]);
pol.cut(curPt, other_pt);
}
pol.dummy = a[idx];
polygons[idx] = pol;
}
int main(int argc, unsigned char* argv[])
{
const int N = 100; // Number of elements in arrays
float a_h[N], *a_d; // Pointer to host & device arrays
Polygon p_h[N/2], *p_d;
for (int i=0; i<N; i++) a_h[i] = (float)(rand()%1000)*0.001;
for (int idx=0; idx<N; idx++)
cut_poly(idx, a_h, p_h, N);
for (int i=0; i<N/2; i++)
printf("%f \t %f \t %u\n", a_h[i], p_h[i].dummy, p_h[i].nbpts);
return 0;
}
Well I guess you can disregard most of my comments. I was by mistake working on a machine I had set up with CUDA 3.2 and it was behaving differently along the lines of the kernel launch failure. When I switched to CUDA 4.1 and CUDA 5.0 things started to make sense. Apologies for my confusion there.
Anyway after getting past that, I pretty quickly noticed that there is a difference between your CPU and GPU implementations. Specifically here (looking at the CPU code):
void cut_poly(int idx, float *a, Polygon* polygons, int N)
{
if (idx>=N/2) return;
Polygon pol;
pol.addPts(Point2D(0.,0.));
pol.addPts(Point2D(1.,0.));
pol.addPts(Point2D(1.,1.));
pol.addPts(Point2D(0.,1.));
Point2D curPt(a[2*idx], a[2*idx+1]);
for (int i=0; i<N/2; i++) {
if (idx==i) continue; /* NOTE THIS LINE MISSING FROM YOUR GPU CODE */
Point2D other_pt(a[2*i], a[2*i+1]);
pol.cut(curPt, other_pt);
}
pol.dummy = a[idx];
polygons[idx] = pol;
}
Referring to the line I have added the comment to above, if you add that exact line of code to the corresponding spot in your GPU code in the cut_poly kernel, then for me anyway the GPU code produces the same printed result as the CPU code.
One other observation I would make is that you are needlessly running blocks with just 4 threads. Nothing wrong with that while you're working out the kinks in the code, but once you have it running for "production" purposes, you will most likely want to target a higher number like 256, and be sure to choose a number that is an integer multiple of 32, for best performance.
In response to a question posted in the comments, I believe that the data is being copied properly, but most likely you are not accessing it correctly on the host. (I don't know how you're determining that "my array is not properly returned to the host"). Most of your class definitions were __device__ only. As a result, it's difficult to access structures within classes on the host (e.g. the Point2D pts class within the Polygon class). I'm inserting modified code here which I think demonstrates that the data is being transferred back to the host:
#include <stdio.h>
#include <iostream>
#include <stdlib.h>
// #include <cuda.h>
#define cudaCheckErrors(msg) \
do { \
cudaError_t __err = cudaGetLastError(); \
if (__err != cudaSuccess) { \
fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
msg, cudaGetErrorString(__err), \
__FILE__, __LINE__); \
fprintf(stderr, "*** FAILED - ABORTING\n"); \
exit(1); \
} \
} while (0)
class Point2D {
public:
__host__ __device__ Point2D(double xx=0, double yy=0):x(xx),y(yy){};
double x, y;
};
__host__ __device__ double dot(const Point2D &A, const Point2D &B) {
return A.x*B.x + A.y*B.y;
}
__host__ __device__ Point2D operator*(double a, const Point2D &P) {
return Point2D(a*P.x, a*P.y);
}
__host__ __device__ Point2D operator+(Point2D A, const Point2D &B) {
return Point2D(A.x + B.x, A.y + B.y);
}
__host__ __device__ Point2D operator-(Point2D A, const Point2D &B) {
return Point2D(A.x - B.x, A.y - B.y);
}
__host__ __device__ Point2D inter(const Point2D &A, const Point2D &B, const Point2D &C, const Point2D &D) { //intersects AB by *the mediator* of CD
Point2D M = 0.5*(C+D);
return A - (dot(A-M, D-C)/dot(B-A, D-C)) * (B-A);
}
class Polygon {
public:
__host__ __device__ Polygon():nbpts(0){};
__host__ __device__ void addPts(Point2D pt) {
pts[nbpts] = pt;
nbpts++;
};
__host__ __device__ Polygon& operator=(const Polygon& rhs) {
nbpts = rhs.nbpts;
dummy = rhs.dummy;
memcpy(pts, rhs.pts, nbpts*sizeof(Point2D));
return *this;
}
__host__ __device__ Point2D getpoint(unsigned i){
if (i<128) return pts[i];
else return pts[0];
}
__host__ __device__ void cut(const Point2D &inside_pt, const Point2D &outside_pt) {
int new_nbpts = 0;
Point2D newpts[128];
Point2D AB(outside_pt-inside_pt);
Point2D M(0.5*(outside_pt+inside_pt));
double ABM = dot(AB, M);
Point2D S = pts[nbpts-1];
for (int i=0; i<nbpts; i++) {
Point2D E = pts[i];
double ddot = -ABM + dot(AB, E);
if (ddot<0) { // E inside clip edge
double ddot2 = -ABM + dot(AB, S);
if (ddot2>0) {
newpts[new_nbpts] = inter(S,E, inside_pt, outside_pt);
new_nbpts++;
}
newpts[new_nbpts] = E;
new_nbpts++;
} else {
double ddot2 = -ABM + dot(AB, S);
if (ddot2<0) {
newpts[new_nbpts] = inter(S,E, inside_pt, outside_pt);
new_nbpts++;
}
}
S = E;
}
memcpy(pts, newpts, min(128, new_nbpts)*sizeof(Point2D));
nbpts = new_nbpts;
}
//private:
Point2D pts[128];
int nbpts;
float dummy;
};
__global__ void cut_poly(float *a, Polygon* polygons, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx>=N/2) return;
Polygon pol;
pol.addPts(Point2D(0.,0.));
pol.addPts(Point2D(1.,0.));
pol.addPts(Point2D(1.,1.));
pol.addPts(Point2D(0.,1.));
Point2D curPt(a[2*idx], a[2*idx+1]);
for (int i=0; i<N/2; i++) {
if (idx==i) continue;
Point2D other_pt(a[2*i], a[2*i+1]);
pol.cut(curPt, other_pt);
}
pol.dummy = pol.getpoint(0).x;
polygons[idx] = pol;
}
int main(int argc, unsigned char* argv[])
{
const int N = 100;
float a_h[N], *a_d;
Polygon p_h[N/2], *p_d;
size_t size = N * sizeof(float);
size_t size_pol = N/2 * sizeof(Polygon);
cudaMalloc((void **) &a_d, size);
cudaCheckErrors("cm1");
cudaMalloc((void **) &p_d, size_pol);
cudaCheckErrors("cm2");
for (int i=0; i<N; i++) a_h[i] = (float)(rand()%1000)*0.001;
cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
cudaCheckErrors("cmcp1");
int block_size = 128;
int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
cut_poly <<< n_blocks, block_size >>> (a_d, p_d, N);
cudaCheckErrors("kernel");
cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
cudaCheckErrors("cmcp2");
cudaMemcpy(p_h, p_d, sizeof(Polygon)*N/2, cudaMemcpyDeviceToHost);
cudaCheckErrors("cmcp3");
for (int i=0; i<N/2; i++)
printf("%f \t %f \t %f \t %u\n", a_h[i], p_h[i].dummy, p_h[i].getpoint(0).x, p_h[i].nbpts);
cudaFree(a_d);
cudaFree(p_d);
return 0;
}
I would suggest using posting new questions for these things.
Related
I have done the algorithm the expected output is :
p00 - p01,
p01 - p03 ,
p03 - p10,
p10 - p12 ,
p12 - p00
But I get this instead:
Convex hull:
p00 - p01
p01 - p03
p03 - p05
p05 - p10
p10 - p00
Points:
p00: (-5,-6)
p01: (6,-4)
p02: (5.5,-3)
p03: (8,0)
p04: (5,0)
p05: (4,2)
p06: (1,3)
p07: (0,2)
p08: (-1,1)
p09: (-1.5,2)
p10: (-1.5,6)
p11: (-5.5,1.5)
p12: (-8,-1)
I have been trying so long to get it right but some how I can't. Can anyone help? I am using C++
Below is my code:
I have 3 classes Vector2D, Point2D and Point2DSet my Graham Scan Implementation is in the buildConvexHull function in the Point2DSet.
Vector2D.cpp
#include "Vector2D.h"
Vector2D::Vector2D(double aX, double aY): fX(aX), fY(aY){ }
void Vector2D::setX(double aX){ fX = aX;}
double Vector2D::getX() const { return fX; }
void Vector2D::setY(double aY) { fY = aY;}
double Vector2D::getY() const { return fY; }
Vector2D Vector2D::operator+(const Vector2D& aRHS) const
{
return (fX + aRHS.fX, fY + aRHS.fY);
}
Vector2D Vector2D::operator-(const Vector2D& aRHS) const
{
return (fX - aRHS.fX, fY - aRHS.fY);
}
double Vector2D::magnitude() const
{
return sqrt((fX * fX) + (fY * fY));
}
double Vector2D::direction() const
{
return atan(fY/fX);
}
double Vector2D::dot(const Vector2D& aRHS) const
{
return (this->getX() * aRHS.getX()) + (this->getY() * aRHS.getY());
}
double Vector2D::cross(const Vector2D& aRHS) const
{
return (this->getX() * aRHS.getY()) - (aRHS.getX() * this->getY());
}
double Vector2D::angleBetween(const Vector2D& aRHS) const
{
double dntmr = magnitude() * aRHS.magnitude();
if (dntmr > 0.0)
{
return acos(this->dot(aRHS) / (this->magnitude() * aRHS.magnitude()));
}
return acos(1.0/1.0);
}
std::ostream& operator<<(std::ostream& aOutStream, const Vector2D& aObject)
{
aOutStream << " ( " << aObject.fX << ", " << aObject.fY << " )\n";
return aOutStream;
}
std::istream& operator>>(std::istream& aInStream, Vector2D& aObject)
{
aInStream >> aObject.fX;
aInStream >> aObject.fY;
return aInStream;
}
Point2D
#include "Point2D.h"
static const Point2D gCoordinateOrigin;
// Private function gets direction in reference to aOther
double Point2D::directionTo(const Point2D& aOther) const
{
return (aOther.fPosition - fPosition).direction();
}
// Private Function to get magnitude in reference to aOther
double Point2D::magnitudeTo(const Point2D& aOther) const
{
return (aOther.fPosition - fPosition).magnitude();
}
Point2D::Point2D() : fId(" "), fPosition(0,0), fOrigin(&gCoordinateOrigin) { }
Point2D::Point2D(const std::string& aId, double aX, double aY) : fId(aId), fPosition(aX,aY), fOrigin(&gCoordinateOrigin) { }
Point2D::Point2D(std::istream &aIStream) : fOrigin(&gCoordinateOrigin)
{
aIStream >> fId >> fPosition;
}
const std::string& Point2D::getId() const { return fId; }
void Point2D::setX(const double& aX) { fPosition.setX(aX); }
void Point2D::setY(const double& aY) { fPosition.setY(aY); }
const double Point2D::getX() const { return fPosition.getX(); }
const double Point2D::getY() const { return fPosition.getY(); }
void Point2D::setOrigin(const Point2D& aPoint) { fOrigin = &aPoint;}
Vector2D Point2D::operator-(const Point2D& aRHS) const
{
return (fPosition - aRHS.fPosition);
}
// Return Direction with reference to origin
double Point2D::direction() const
{
return fOrigin->directionTo(*this);
}
// Return Direction with reference to origin
double Point2D::magnitude() const
{
return fOrigin->magnitudeTo(*this);;
}
bool Point2D::isCollinear(const Point2D& aOther) const
{
if (fPosition.cross(aOther.fPosition) == 0)
{
return true;
}
return false;
}
// Check to see if the point is Clockwise or not
bool Point2D::isClockwise(const Point2D& aP0, const Point2D& aP2) const
{
double val = (fPosition.getY() - aP0.fPosition.getY()) * (aP2.fPosition.getX() - fPosition.getX()) -
(fPosition.getX() - aP0.fPosition.getX()) * (aP2.fPosition.getY() - fPosition.getY());
double val2 = fPosition.angleBetween(aP2.fPosition) - fPosition.angleBetween(aP0.fPosition);
if (val < 0 )
{
return false;
}
return true;
}
bool Point2D::operator<(const Point2D& aRHS) const
{
if (fPosition.getY() < aRHS.getY())
{
return true;
}
return false;
}
const Point2D& Point2D::getOrigin() const { return *fOrigin;}
std::ostream& operator<<(std::ostream& aOStream, const Point2D& aObject)
{
aOStream << aObject.fId << " : " << aObject.fPosition;
return aOStream;
}
std::istream& operator>>(std::istream& aIStream, Point2D& aObject)
{
aIStream >> aObject.fId >> aObject.fPosition;
return aIStream;
}
Point2DSet
#include "Point2DSet.h"
#include <fstream>
#include <stdexcept>
#include <algorithm>
void Point2DSet::add(const Point2D& aPoint)
{
fPoints.push_back(aPoint);
}
void Point2DSet::add(Point2D&& aPoint)
{
fPoints.push_back(aPoint);
}
void Point2DSet::removeLast()
{
fPoints.pop_back();
}
bool Point2DSet::doesNotTurnLeft(const Point2D& aPoint) const
{
return fPoints[size()-1].isClockwise(fPoints[size()-2],aPoint);
}
// Comparator function for Stable_sort
bool orderByCoordinates(const Point2D& aLeft, const Point2D& aRight)
{
return aLeft < aRight;
}
//Comparator function for Stable_sort
bool orderByPolarAngle(const Point2D& aLHS, const Point2D& aRHS)
{
if (aLHS.isCollinear(aRHS))
{
return aLHS.magnitude() > aRHS.magnitude();
}
return aLHS.direction() < aRHS.direction();
}
void Point2DSet::populate(const std::string& aFileName)
{
std::ifstream INPUT(aFileName);
//std::ifstream INPUT("Pointers.txt");
std::string id;
double x;
double y;
while (INPUT >> id >> x >> y)
{
Point2D z(id, x, y);
add(z);
}
INPUT.close();
}
void Point2DSet::buildConvexHull(Point2DSet& aConvexHull)
{
aConvexHull.clear();
sort(orderByCoordinates);
sort(orderByPolarAngle);
aConvexHull.add(fPoints[0]); // Origin (Smallest y-coordinate)
aConvexHull.add(fPoints[1]); //
//aConvexHull.add(fPoints[2]);
if (fPoints[2].isCollinear(fPoints[1])) {
aConvexHull.add(fPoints[2]);
}
//*/
for(size_t i = 3; i < size(); i++)
{
if (fPoints[i - 1].isCollinear(fPoints[i]))
{
continue; //i++;
}
if(aConvexHull.doesNotTurnLeft(fPoints[i]))
{
aConvexHull.removeLast();
}
aConvexHull.add(fPoints[i]);
}//*/
}
size_t Point2DSet::size() const
{
return fPoints.size();
}
void Point2DSet::clear()
{
fPoints.clear();
}
void Point2DSet::sort(Comparator aComparator)
{
stable_sort(fPoints.begin(), fPoints.end(), aComparator);
}
const Point2D& Point2DSet::operator[](size_t aIndex) const
{
return fPoints[aIndex];
}
Point2DSet::Iterator Point2DSet::begin() const
{
return fPoints.begin();
}
Point2DSet::Iterator Point2DSet::end() const
{
return fPoints.end();
}
Any other improvements are warmly welcome. Thank You!
There was a few issues in your code.
Let's start with Vector2D::direction(), you should use atan2, here the explanation why. After that we will be able to correctly sort the points.
Now the main algorithm. After a few changes it looks:
aConvexHull.clear();
// Get points with bigger magnitude first.
sort(orderByMagnitudeDescending);
sort(orderByPolarAngle);
// We want to have the lowest point as the first element.
rotatePointsByLowest();
aConvexHull.add(fPoints[0]); // Origin (Smallest y-coordinate)
aConvexHull.add(fPoints[1]);
for(size_t i = 2; i < size(); i++)
{
if (fPoints[i - 1].isCollinear(fPoints[i]))
{
continue; //i++;
}
// There should be a loop instead of an if statement.
while (aConvexHull.fPoints.size() > 2 && aConvexHull.doesNotTurnLeft(fPoints[i]))
{
aConvexHull.removeLast();
}
aConvexHull.add(fPoints[i]);
}//*/
The algorithm requires to find the lowest point and then traverse the rest of points according to their angle. I added a helper function Point2DSet::rotatePointsByLowest:
void Point2DSet::rotatePointsByLowest() {
auto lowestPoint = fPoints.begin();
for (auto iterator = fPoints.begin() + 1;iterator != fPoints.end(); iterator++) {
if (iterator->fPosition.fY < lowestPoint->fPosition.fY) {
lowestPoint = iterator;
} else if ((iterator->fPosition.fY == lowestPoint->fPosition.fY) && (iterator->fPosition.fX < lowestPoint->fPosition.fX)) {
lowestPoint = iterator;
}
}
std::rotate(fPoints.begin(), lowestPoint, fPoints.end());
}
There are more improvements that should be applied but I wanted to keep the changes minimal to show the issues causing the incorrect result.
Link for testing your project: https://onlinegdb.com/_ZXmQF2vJ
I am working on a class project and im getting these errors, if anyone could possibly help that would be great, im new to stack overflow so im sorry if i dont post things properly.
The error states :Shapes.cpp:35:51: error: default argument given for parameter 2 of 'bool Point::draw(Screen&, char)' [-fpermissive]
bool Point::draw(Screen& scr, char ch = Screen::FG){
#ifndef Shapes_h
#define Shapes_h
#include<bits/stdc++.h>
using namespace std;
class Screen{
friend class Shape;
private:
size_t _w, _h;
vector<vector<char> > _pix;
public:
static const char FG = '*', BG = '.';
Screen(size_t w, size_t h){
_pix.resize(_h, vector<char>(_w));
}
size_t get_w() const{ return _w;}
size_t get_h() const{ return _h;}
vector<vector<char> >& get_pix() { return _pix;}
void set_w(size_t w) { _w = w;}
void set_h(size_t h) { _h = h;}
void clear() { fill(BG); }
void fill(char c); // todo cpp file
string to_string() const;
friend std::ostream& operator<<(std::ostream& os, const Screen& scr) {
return os << scr.to_string();
};
friend class Tests; // Don't remove this line
};
// SHAPE ---------------
class Shape{
public:
virtual ~Shape() {}
virtual bool draw(Screen& scr, char ch = Screen::FG) = 0;
friend class Tests;
};
class Point : public Shape {
private:
size_t _x, _y;
public:
Point(size_t x, size_t y) : _x(x), _y(y) {}
virtual ~Point() {}
bool draw(Screen& scr, char ch = Screen::FG);
friend class Tests;
};
class Line: public Shape{
private:
size_t _x1, _y1, _x2, _y2;
static bool draw_by_x(Screen& scr, char ch, size_t x1, size_t y1, size_t x2, size_t y2){
if (x2>x1){
draw_by_x(scr, ch, x2, y2, x1, y1);
}
double dy = (double) ((double)y2-y1)/((double)x2-x1);
size_t x = x1;
size_t y = y1;
bool boo = true;
for (size_t i = x1; i <= x2; i++){
boo &= Point((size_t)x, (size_t) y).draw(scr, ch);
x++; y += dy;
}
return boo;
}
static bool draw_by_y(Screen& scr, char ch, size_t x1, size_t y1, size_t x2, size_t y2){
if (x2>x1){
draw_by_x(scr, ch, x2, y2, x1, y1);
}
double dx = (double) ((double)x2-x1)/((double)y2-y1);
size_t x = x1;
size_t y = y1;
bool boo = true;
for (size_t i = x1; i <= x2; i++){
boo &= Point((size_t)x, (size_t) y).draw(scr, ch);
x += dx; y += 1;
}
return boo;
}
public:
Line(size_t a, size_t b, size_t c, size_t d) : _x1(a), _y1(b), _x2(c), _y2(d) {}
virtual ~Line() {}
bool draw(Screen& scr, char ch = Screen::FG);
friend class Tests; // Don't remove
};
class Quadrilateral: public Shape {
private:
size_t _x1, _y1, _x2, _y2, _x3, _y3, _x4, _y4;
public:
Quadrilateral(size_t a, size_t b, size_t c, size_t d, size_t e, size_t f, size_t g, size_t h):
_x1(a), _y1(b), _x2(c), _y2(d), _x3(e), _y3(f), _x4(g), _y4(h) {}
virtual ~Quadrilateral() {}
bool draw(Screen& scr, char ch = Screen::FG);
friend class Tests; // Don't remove
};
class Upright_Rectangle : public Quadrilateral {
public:
Upright_Rectangle(size_t x1,size_t y1, size_t x2, size_t y2) :
Quadrilateral(x1,y1, x1,y2, x2,y2, x2,y1) {}
virtual ~Upright_Rectangle() {}
};
class Stick_Man : public Shape{
static const size_t DEFAULT_W = 20, DEFAULT_H = 40;
private:
size_t _x, _y, _w, _h;
vector < Shape* > _parts;
public:
Stick_Man(size_t x = 0, size_t y = 0, size_t w = DEFAULT_W, size_t h = DEFAULT_H){
}
virtual ~Stick_Man();
const std::vector<Shape *>& get_parts() const { return _parts; }
bool draw(Screen& scr, char ch = Screen::FG){
return"";
}
friend class Tests; // Don't remove
};
#endif /* Shapes_h */
HERE IS MY CPP:
#include "Shapes.h"
#include<bits/stdc++.h>
#define pb push_back
using namespace std;
void Screen::fill(char c){
for (size_t i = 0; i < _h; i++){
for (size_t j = 0; j < _w; j++){
_pix[i][j] = c;
}
}
return;
}
void Screen::clear(char c){
fill(c);
return;
}
string Screen::to_string()const{
string ans = "";
for (size_t i = _h-1; i >= 0; i--){
for (size_t j = 0; j < _w; j++){
ans += _pix[i][j];
}
ans += "\n";
}
return ans;
}
bool Point::draw(Screen& scr, char ch = Screen::FG){
if (_x < scr.get_h() && _y < scr.get_w()){
scr.get_pix()[_x][_y] = ch;
return true;
}
return false;
}
bool Line::draw(Screen& scr, char ch = Screen::FG){
if (abs(int(_x1-_x2)) > abs(int(_y1-_y2))) return draw_by_x(scr, ch, _x1, _y1, _x2, _y2);
return draw_by_y(scr, ch, _x1, _y1, _x2, _y2);
}
bool Quadrilateral::draw(Screen& scr, char ch = Screen::FG){
size_t x1, y1, x2, y2, x3, y3, x4, y4;
x1 = _x1;
x2 = _x2;
x3 = _x3;
x4 = _x4;
y1 = _y1;
y2 = _y2;
y3 = _y3;
y4 = _y4;
if ((x2-x3)==(x4-x1) && (y2-y3)==(y4-y1)){
swap(x2, x3);
swap(y2, y3);
}
else if ((x4-x3)==(x2-x1) && (y4-y3)==(y2-y1)){
swap(x4, x3);
swap(y4, y3);
}
bool boo = true;
boo &= Line(x1, y1, x2, y2).draw(scr, ch);
boo &= Line(x2, y2, x3, y3).draw(scr, ch);
boo &= Line(x3, y3, x4, y4).draw(scr, ch);
return boo;
}
Remove the default parameter from the definition. Instead of
bool Point::draw(Screen& scr, char ch = Screen::FG){
if (_x < scr.get_h() && _y < scr.get_w()){
scr.get_pix()[_x][_y] = ch;
return true;
}
Change it to
bool Point::draw(Screen& scr, char ch){
if (_x < scr.get_h() && _y < scr.get_w()){
scr.get_pix()[_x][_y] = ch;
return true;
}
The reason you get the error is because you've already declared and defined it the draw function within the class here
bool draw(Screen& scr, char ch = Screen::FG){
return"";
}
Change this to
bool draw(Screen& scr, char ch = Screen::FG){
}
And you won't get the redefinition error anymore.
So I am testing out some object arrays in C++, and I am trying to delete these objects afterwards, as I am supposed to.
But here's the problem: the deleteInputPattern variable works fine, so I am able to fully delete "inputs" within the CSVFile header class, but its equivalent in the main file, "inputArray", triggers a breakpoint.
What is the problem here? Am I trying to delete non-existent memory? Do any of the pointers need
Code wall below:
InputTest.h:
#pragma once
class InputTest
{
private:
float r;
float g;
float b;
float t;
public:
InputTest();
~InputTest();
InputTest(float r, float g, float b, float t);
void setR(float newT);
float getR();
void setG(float newT);
float getG();
void setB(float newT);
float getB();
void setT(float newT);
float getT();
void print(int count);
};
InputTest.cpp:
#include "InputTest.h"
#include <stdio.h>
InputTest::InputTest()
{
printf("Input constructor\n");
}
InputTest::~InputTest()
{
printf("Input destructor\n");
}
InputTest::InputTest(float r, float g, float b, float t)
{
this->r = r;
this->g = g;
this->b = b;
this->t = t;
}
void InputTest::setR(float newT)
{
r = newT;
}
float InputTest::getR()
{
return r;
}
void InputTest::setG(float newT)
{
g = newT;
}
float InputTest::getG()
{
return g;
}
void InputTest::setB(float newT)
{
b = newT;
}
float InputTest::getB()
{
return b;
}
void InputTest::setT(float newT)
{
t = newT;
}
float InputTest::getT()
{
return t;
}
void InputTest::print(int count)
{
printf("R: %.2f\n", r);
printf("G: %.2f\n", g);
printf("B: %.2f\n", b);
printf("T: %.2f\n", t);
}
Copy.h:
#pragma once
class InputTest;
class Copy
{
private:
int patternCount;
InputTest** inputs;
public:
Copy();
~Copy();
InputTest* getInputPattern(int index);
void addInputPattern(InputTest* in);
void deleteInputPattern();
};
Copy.cpp:
#include "Copy.h"
#include "InputTest.h"
#include <string.h>
#include <stdio.h>
Copy::Copy()
{
printf("CSV File constructor\n");
inputs = NULL;
patternCount = 0;
inputs = new InputTest*[3];
int i;
for (i = 0; i < 3; i++)
{
inputs[i] = new InputTest();
}
}
Copy::~Copy()
{
printf("CSV File destructor\n");
}
InputTest * Copy::getInputPattern(int index)
{
printf("input gotten: %d\n", index);
return inputs[index];
}
void Copy::addInputPattern(InputTest * in)
{
inputs[patternCount] = in;
patternCount++;
printf("input added: %d\n", patternCount);
}
void Copy::deleteInputPattern()
{
int i;
for (i = 0; i < patternCount; i++)
{
delete inputs[i];
}
delete inputs;
inputs = NULL;
}
main.cpp:
#include "Copy.h"
#include "InputTest.h"
#include <string.h>
#include <stdio.h>
int main(int argv, char** argc)
{
bool testResult = false;
Copy *test = NULL;
test = new Copy();
InputTest **inputArray;
inputArray = new InputTest*[3];
int count;
for (count = 0; count < 3; count++)
{
inputArray[count] = new InputTest();
inputArray[count]->setR(0.2f);
inputArray[count]->setG(0.6f);
inputArray[count]->setB(0.8f);
inputArray[count]->setT(0.5f);
test->addInputPattern(inputArray[count]);
inputArray[count] = test->getInputPattern(count);
printf("next\n");
}
for (count = 0; count < 3; count++)
{
printf("round %d\n", count);
printf("R: %f\n", inputArray[count]->getR());
printf("G: %f\n", inputArray[count]->getG());
printf("B: %f\n", inputArray[count]->getB());
printf("T: %f\n", inputArray[count]->getT());
}
test->deleteInputPattern();
for (count = 0; count < 3; count++)
{
delete inputArray[count];
}
delete inputArray;
delete test;
inputArray = NULL;
test = NULL;
return testResult;
}
These seem to be the problematic line:
test->deleteInputPattern();
for (count = 0; count < 3; count++)
{
delete inputArray[count];
}
Since you have already deleted using test->deleteInputPattern(), that memory is freed. Now you are deleting the same objects (to which you are still holding a reference via inputArray) explicitly in main using delete inputArray. But that memory is already deleted in deleteInputPattern and hence you should be getting a memory access error.
You need to free any allocated memory only once. There is no need to do it again in main(). Either call deleteInputPattern or call delete explicitly in main, but not both. I can recommend 2 best practices:
Use smart pointers
The allocating module should delete the memory (this may not be applicable in many situations though such as factories)
I am using Cuda with C++ to do some parallel computing. Recently, I noticed something that I cannot understand and I didn't find informations about it when looking for it. In my code, one line which is very seldom exectued (but needed) slows down the program even when it is not executed at all. Here is some code to make it more clear:
The class I created:
class Foo
{
void myFunction(Foo *listFoo);
//some other functions that I need
...
int myAttribute;
//some other attributes that I need
...
}
The definition of myFunction:
void Foo::myFunction(Foo *listFoo)
{
//do some computations on the listFoo
if( condition seldom verified )
{ myAttribute = myAttribute + 1; }
}
The global function:
__global__ void compute(Foo *listFoo, int numberOfFoo)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
if( i < numberOfFoo)
{ listFoo[i].myFunction(listFoo); }
}
The host code:
compute<<<(numberOfFoo + 511)/512, 512>>> (listFoo, numberOfFoo)
The line slowing down everything is myAttribute = myAttribute + 1. Even when it is executed 0 times, the code is really slow compared to when the line is put in the comment. I tried to replace this line with a simple printf. The result is the same, the line is never executed but it slows down everything.
If you have any suggestion on the reason and on eventually how to solve this problem, it would be very much appreciated. My level in programing is not so advanced, so please use relatively easy explanations.
Thanks a lot
First Edit: few people requested the code, so here it is! I reduced it to 700 lines, I know it is still very long but not much would work if I keep removing some parts of it. It compiles without problems for me. All you have to do is press enter, wait few seconds and the time taken will be shown in the command window.
It is in the function findContactwithGrain() that the problem occurs. The line addContact(grainContact) is slowing down everything. On my computer, if this line is active, one computation takes around 3.5 sec. If I put it in comment, it takes 0.07 sec. That's a huge difference for one line that is never executed.
Hope this helps to understand the problem
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <fstream> // to read and write files
#include <stdio.h>
#include <iostream>
#include <time.h>
#include <string>
#include <sstream>
#define n 200
using namespace std;
int global_totalNumberBlock = 0;
int global_totalNumberGrain = 0;
//tell the compiler that those classes exist
class Vec3d2;
class Block;
class Grain;
class Contact;
class Analysis;
class Vec3d2
{
public:
__host__ __device__ Vec3d2(void);
__host__ __device__ Vec3d2(double x_value, double y_value, double z_value);
__host__ __device__ ~Vec3d2(void);
__host__ __device__ double dot(Vec3d2 a) const;
__host__ __device__ Vec3d2 cross(Vec3d2 a) const;
__host__ __device__ double norm() const;
__host__ __device__ void normalize();
// to be able to use cout easily
__host__ __device__ friend ostream & operator <<(ostream &s,const Vec3d2 &vec)
{
s << vec.x << endl;
s << vec.y << endl;
s << vec.z << endl;
return s;
}
//to be able to use brackets
__host__ __device__ double operator [](int i) const
{
if( i == 0)
{
return x;
}
else if( i == 1)
{
return y;
}
else if( i == 2)
{
return z;
}
else
{
cout << "ERROR IN USING VEC3D2" << endl;
system("PAUSE");
}
}
__host__ __device__ double & operator [](int i)
{
if( i == 0)
{
return x;
}
else if( i == 1)
{
return y;
}
else if( i == 2)
{
return z;
}
else
{
cout << "ERROR IN USING VEC3D2" << endl;
system("PAUSE");
}
}
//attributes
double x, y, z;
};
//Class Vec3d2 functions and operators
Vec3d2::Vec3d2()
{
x = 0;
y = 0;
z = 0;
}
Vec3d2::Vec3d2(double x_value, double y_value, double z_value)
{
x = x_value;
y = y_value;
z = z_value;
}
Vec3d2::~Vec3d2()
{
}
double Vec3d2::dot(Vec3d2 a) const
{
return x*a.x + y*a.y + z*a.z;
}
Vec3d2 Vec3d2::cross(Vec3d2 a) const
{
Vec3d2 result( y*a.z - z*a.y, x*a.z - z*a.x, x*a.y - y*a.x);
return result;
}
double Vec3d2::norm() const
{
return sqrt((double) x*x + y*y + z*z);
}
void Vec3d2::normalize()
{
double norm = this->norm();
if (norm > 0)
{
x = x/norm;
y = y/norm;
z = z/norm;
}
else //the vector has a null norm so nothing to do
{
}
}
__host__ __device__ Vec3d2 operator+(Vec3d2 const& a, Vec3d2 const& b)
{
return Vec3d2(a.x + b.x, a.y + b.y, a.z + b.z);
}
__host__ __device__ Vec3d2 operator-(Vec3d2 const& a, Vec3d2 const& b)
{
return Vec3d2(a.x - b.x, a.y - b.y, a.z - b.z);
}
__host__ __device__ Vec3d2 operator*(Vec3d2 const& a, double const& b)
{
return Vec3d2(b*a.x, b*a.y, b*a.z);
}
__host__ __device__ Vec3d2 operator*(double const& b, Vec3d2 const& a)
{
return Vec3d2(b*a.x, b*a.y, b*a.z);
}
__host__ __device__ Vec3d2 operator/(Vec3d2 const& a, double const& b)
{
return Vec3d2(a.x/b, a.y/b, a.z/b);
}
__host__ __device__ Vec3d2 operator/(double const& b, Vec3d2 const& a)
{
return Vec3d2(a.x/b, a.y/b, a.z/b);
}
__host__ __device__ bool operator==(Vec3d2 const& a, Vec3d2 const& b)
{
if(a.x == b.x && a.y == b.y && a.z == b.z)
{
return true;
}
else
{
return false;
}
}
__host__ __device__ bool operator!=(Vec3d2 const& a, Vec3d2 const& b)
{
if( a.x != b.x || a.y != b.y || a.z != b.z)
{
return true;
}
else
{
return false;
}
}
class Contact
{
public:
__host__ __device__ Contact(void);
//__host__ __device__ Contact(Contact const& ContactToCopy);
__host__ __device__ ~Contact(void);
__host__ __device__ void setContact(Grain &grain1, Grain &grain2, double overlap_value);
};
class Block
{
public:
__host__ Block(void);
__host__ Block(Block const& BlockToCopy);
__host__ __device__ ~Block(void);
__host__ __device__ Contact* getContactList() const;
__host__ __device__ Contact** getContactListPtr();
__host__ __device__ int getMaxNumberContact() const;
__host__ __device__ int getNumberContact() const;
__host__ __device__ void setContactList(Contact *ptr);
__host__ __device__ void addContact(Contact contact_value);
__host__ __device__ void clearContactList();// empty the contactList
__host__ __device__ void deleteBlockData(); //clear the memory taken by the contactList
__host__ __device__ Block& operator=(Block const& BlockToCopy);
protected:
int Id; //unique Id number for each entity double mass;
int totalNumberBlock; //same value for each block, cannot use static attribute because of cuda
Contact *contactList;
int numberContact, old_numberContact; //because there is no way to find it from the pointer contactList
int maxNumberContact; //maximum number of contact per block, we have to choose this
};
class Grain: public Block
{
public:
__host__ Grain(void);
__host__ Grain(Grain const& grainToCopy);
__host__ Grain(Vec3d2 position_value, double radius_value, double mass_value);
__host__ __device__ ~Grain(void);
__host__ __device__ Vec3d2 getPositionVec() const;
__host__ __device__ Vec3d2* getPosition() const;
__host__ __device__ Vec3d2** getPositionPtr();
__host__ __device__ int getTotalNumberGrain() const;
__host__ void setTotalNumberGrain();
__host__ __device__ void setTotalNumberGrain(int number);
__host__ __device__ void setPosition(Vec3d2 *ptr);
__host__ __device__ void setPositionVec(Vec3d2 position_value);
__host__ __device__ void deleteGrainData();
__host__ __device__ void findContactwithGrain(Grain *grainList);
__host__ __device__ Grain& operator=(Grain const& grainToCopy);
__host__ __device__ friend ostream & operator <<(ostream &s,const Grain &grain)
{
s <<"position is" << endl;
s << *grain.position << endl;
s <<"grain number is" << endl;
s << grain.number << endl;
s <<"radius is" << endl;
s << grain.radius << endl;
s <<"mass is" << endl;
return s;
}
private:
Vec3d2 *position;
int totalNumberGrain;
int number; //different from Id defined in class Block because a wall could have the same number as a grain
double radius;
};
class Analysis
{
public:
Analysis(void);
Analysis(Grain *grainList);
~Analysis(void);
Grain* getGrainList();
void copyToDevice();
void copyToHost();
void runAnalysis();
private:
//should contain grainList, wallList and their equivalent for the device
//should contain an array of pointers for each attribute being a pointer in grain and wall and their equivalent in the device
int totalNumberGrain, totalNumberWall;
Grain *grainList, *d_grainList;
//for grain data
Contact **grain_contactList, **d_grain_contactList;
Vec3d2 **grain_position, **d_grain_position;
};
//class Contact functions
Contact::Contact(void)
{
}
Contact::~Contact(void)
{
}
void Contact::setContact(Grain &grain1, Grain &grain2, double overlap_value)//we are in grain1 and contact with grain2
{
}
//class Block functions
Block::Block(void)
{
Id = global_totalNumberBlock;
numberContact = 0;
old_numberContact = 0;
//contact list settings
maxNumberContact = 30;
contactList = new Contact[maxNumberContact];
//increment of block number
global_totalNumberBlock = global_totalNumberBlock + 1;
}
Block::~Block(void)
{
delete[] contactList;
//cout << "CAREFUL, YOU ARE DESTROYING A BLOCK" << endl;//because we should never erase a block
//system("PAUSE");
totalNumberBlock = totalNumberBlock - 1;
}
Block::Block(Block const& BlockToCopy)
{
Id = BlockToCopy.Id;
numberContact = BlockToCopy.numberContact;
old_numberContact = BlockToCopy.old_numberContact;
maxNumberContact = BlockToCopy.maxNumberContact;
contactList = new Contact[maxNumberContact];
for(int i =0; i <numberContact; i++)
{
contactList[i] = BlockToCopy.contactList[i];
}
}
Contact* Block::getContactList() const
{
return contactList;
}
Contact** Block::getContactListPtr()
{
return &contactList;
}
int Block::getMaxNumberContact() const
{
return maxNumberContact;
}
int Block::getNumberContact() const
{
return numberContact;
}
void Block::setContactList(Contact *ptr)
{
//no "delete contactList" here because this is executed after cuda. The contactList is pointing to nothing and deleteing it will cause an error
contactList = ptr;
}
void Block::addContact(Contact contact_value)
{
if(numberContact < maxNumberContact)
{
contactList[numberContact] = contact_value;
numberContact = numberContact + 1;
}
else //find a way to throw an error because the list is too small for all the contacts
{
printf("TOO MANY CONTACTS ON ONE GRAIN");
}
}
void Block::clearContactList()
{
//delete[] contactList;
//contactList = new Contact[maxNumberContact];
if(numberContact > 0)
{
numberContact = 0;
}
}
void Block::deleteBlockData()
{
delete[] contactList;
}
__host__ __device__ Block& Block::operator=(Block const& BlockToCopy)
{
if(this != &BlockToCopy) //to check we are not doing a = a
{
Id = BlockToCopy.Id;
numberContact = BlockToCopy.numberContact;
old_numberContact = BlockToCopy.old_numberContact;
maxNumberContact = BlockToCopy.maxNumberContact;
delete[] contactList;
contactList = new Contact[maxNumberContact];
for(int i =0; i <numberContact; i++)
{
contactList[i] = BlockToCopy.contactList[i];
}
}
return *this;
}
//class Grain functions
Grain::Grain(void)
{
number = global_totalNumberGrain;
global_totalNumberGrain = global_totalNumberGrain + 1;
totalNumberGrain = -1;//safety
//initialize Vec3d2
position = new Vec3d2;
}
Grain::Grain(Grain const& grainToCopy)
{
cout <<"COPY CONSTRUCTOR OF GRAIN IS NOT DONE YET"<<endl;
system("PAUSE");
//totalNumberGrain = grainToCopy.totalNumberGrain;
//radius = grainToCopy.radius;
//diameter = grainToCopy.diameter;
//volume = grainToCopy.volume;
//inertia = grainToCopy.inertia;
//position = new Vec3d2;
//old_position = new Vec3d2;
//old_velocity = new Vec3d2;
//old_acceleration = new Vec3d2;
//old_angularVelocity = new Vec3d2;
//old_angularAcceleration = new Vec3d2;
//gravityForce = new Vec3d2;
//*position = *grainToCopy.position;
//*old_position = *grainToCopy.old_position;
//*old_velocity = *grainToCopy.old_velocity;
//*old_acceleration = *grainToCopy.old_acceleration;
//*old_angularVelocity = *grainToCopy.old_angularVelocity;
//*old_angularAcceleration = *grainToCopy.old_angularAcceleration;
//*gravityForce = *grainToCopy.gravityForce;
}
Grain::Grain(Vec3d2 position_value, double radius_value,double mass_value)//, number(totalNumberGrain)
{
number = global_totalNumberGrain;
global_totalNumberGrain = global_totalNumberGrain + 1;
totalNumberGrain = -1;//safety
radius = radius_value;
//initialize all the Vec3d2 parameters
position = new Vec3d2;
*position = position_value;
}
Grain::~Grain(void)
{
//cout << "CAREFUL, YOU ARE DESTROYING A GRAIN" << endl;//because we should never erase a block
//system("PAUSE");
totalNumberGrain = totalNumberGrain - 1;
delete position;
}
Vec3d2 Grain::getPositionVec() const
{
return *position;
}
Vec3d2* Grain::getPosition() const
{
return position;
}
Vec3d2** Grain::getPositionPtr()
{
return &position;
}
int Grain::getTotalNumberGrain() const
{
return totalNumberGrain;
}
void Grain::setTotalNumberGrain()
{
totalNumberGrain = global_totalNumberGrain;
}
void Grain::setTotalNumberGrain(int number)
{
totalNumberGrain = number;
}
void Grain::setPosition(Vec3d2 *ptr)
{
position = ptr;
}
void Grain::setPositionVec(Vec3d2 position_value)
{
*position = position_value;
}
void Grain::deleteGrainData()
{
delete position;
}
void Grain::findContactwithGrain(Grain *grainList)
{
for(int m = 0; m < n; m++)
{
double length;
length = (*position - (*grainList[m].position)).norm();
if( length < radius + grainList[m].radius)
{
if( number != grainList[m].number) //faster than number != sortedGrainList[m]
{
Vec3d2 relativePosition = *position - (*grainList[m].position) ;
double overlap = radius + grainList[m].radius - relativePosition.norm();
//define the contact
Contact grainContact;
grainContact.setContact(*this, grainList[m], overlap);
addContact(grainContact); //IF YOU PUT THIS LINE IN COMMENT, EVERYTHING GOES A LOT FASTER
}
}
}
}
__host__ __device__ Grain& Grain::operator=(Grain const& grainToCopy)
{
if(this != &grainToCopy)
{
Block::operator=(grainToCopy); //this lines call the operator = defined for Block. So it copies the block attributes of the first grain into the second grain
//totalNumberGrain = grainToCopy.totalNumberGrain;
radius = grainToCopy.radius;
*position = *grainToCopy.position;
}
return *this;
}
//class Analysis functions
Analysis::Analysis(void)
{
}
Analysis::Analysis(Grain *grainList_value)
{
totalNumberGrain = grainList_value[0].getTotalNumberGrain();
grainList = new Grain[totalNumberGrain];
//copy grains
for(int i = 0; i < totalNumberGrain; i++)
{
grainList[i] = grainList_value[i];
grainList[i].setTotalNumberGrain(grainList_value[i].getTotalNumberGrain());
}
}
Analysis::~Analysis(void)
{
delete[] grainList;
//a lot more delete should be made here
}
Grain* Analysis::getGrainList()
{
return grainList;
}
void Analysis::copyToDevice()
{
//declare device grainList and wallList and copy the values
cudaMalloc(&d_grainList, totalNumberGrain*sizeof(Grain));
cudaMemcpy(d_grainList, grainList, totalNumberGrain*sizeof(Grain), cudaMemcpyHostToDevice);
////declare device list of pointer to pass pointer values of grain
d_grain_contactList = new Contact*[totalNumberGrain];
d_grain_position = new Vec3d2*[totalNumberGrain];
for(int i = 0; i < totalNumberGrain; i++)
{
cudaMalloc(&d_grain_contactList[i], grainList[i].getMaxNumberContact()*sizeof(Contact));
cudaMalloc(&d_grain_position[i], sizeof(Vec3d2));
}
//copy pointers and values for grains
for(int i = 0; i < totalNumberGrain; i++)
{
//pointers
cudaMemcpy(d_grainList[i].getContactListPtr(), &d_grain_contactList[i], sizeof(Contact*), cudaMemcpyHostToDevice);
cudaMemcpy(d_grainList[i].getPositionPtr(), &d_grain_position[i], sizeof(Vec3d2*), cudaMemcpyHostToDevice);
//values
cudaMemcpy(d_grain_contactList[i], grainList[i].getContactList(), grainList[i].getMaxNumberContact()*sizeof(Contact), cudaMemcpyHostToDevice);
cudaMemcpy(d_grain_position[i], grainList[i].getPosition(), sizeof(Vec3d2), cudaMemcpyHostToDevice);
}
}
void Analysis::copyToHost()
{
//delete the pointer value or it will create a memory leak
for(int i = 0; i < totalNumberGrain; i++)
{
grainList[i].deleteBlockData();
grainList[i].deleteGrainData();
}
//copy non pointer value
cudaMemcpy(grainList, d_grainList, totalNumberGrain*sizeof(Grain),cudaMemcpyDeviceToHost);
//copy pointer values for grains
grain_contactList = new Contact*[totalNumberGrain];
grain_position = new Vec3d2*[totalNumberGrain];
for(int i = 0; i < totalNumberGrain; i++)
{
grain_contactList[i] = new Contact[grainList[i].getMaxNumberContact()];
grain_position[i] = new Vec3d2;
grainList[i].setContactList(grain_contactList[i]);
grainList[i].setPosition(grain_position[i]);
cudaMemcpy(grain_contactList[i], d_grain_contactList[i], grainList[i].getMaxNumberContact()*sizeof(Contact), cudaMemcpyDeviceToHost);
cudaMemcpy(grain_position[i], d_grain_position[i], sizeof(Vec3d2), cudaMemcpyDeviceToHost);
}
}
__global__ void compute( Grain *g)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
//__syncthreads();
if( i < n )
{
g[i].findContactwithGrain(g);
}
}
void Analysis::runAnalysis()
{
for(int i = 0; i < 3; i ++)
{
clock_t begin = clock();
for(int j = 0; j < 10000; j++)
{
compute<<<(n + 511)/512, 512>>>(d_grainList);
}
clock_t end = clock();
cout << (double)(end-begin)/CLOCKS_PER_SEC << endl;
system("PAUSE");
}
}
int main(void)
{
//grain
Vec3d2 position1; position1[0] = 0;position1[1] = 0;position1[2] = 0;
double radius1 = 1;
////cuda
cout << "PRESS ENTER TO START" << endl;
system("PAUSE");
clock_t begin = clock();
Grain *g, *d_g;
g = new Grain[n];
for(int i = 0; i<n; i++)
{
g[i].setTotalNumberGrain();
}
Grain g1(position1, radius1, 0.1);
for(int i = 0; i <n; i++)
{
g[i] = g1;
g[i].setPositionVec(Vec3d2(3*i+1.5, 1.5, 0));
}
Analysis a(g);
a.copyToDevice();
a.runAnalysis();
clock_t end = clock();
cout << (double)(end-begin)/CLOCKS_PER_SEC << endl;
return 0;
}
I would need more code to verify but, the most likely explanation is that when you do not include code you are actually not writing any data to global memory. When you don't write anything to global memory nvcc will optimize just about everything out to the point where you will be running just about nothing.
The same is true when you include a print statement. Print statements are viewed as output, therefore nvcc can't compile out code that it is dependent on.
For example:
__global__ empty_kernel(int* huge_array, int num_elements){
int local_memory;
for(int i=0; i<num_elements; i++){
local_memory+=huge_array[i];
}
}
will run faster than:
__global__ empty_kernel(int* small_array, int num_elements, int* smaller_array){
int tid = ThreadIdx.x+BlockIdx.x*BlockDim.x;
int local_memory;
for(int i=0; i<5; i++){
local_memory+=huge_array[tid*i];
}
smaller_array[tid]=local_memory;
}
The bottom line being, your first kernel isn't faster, it just isn't being run.
The problem in my opinion is simply the if statement, not the statement that it conditionally executes. Conditional branching can be quite expensive on GPU architectures (though it seems to get better with newer architectures), and just having a branching statement could definitely slow down your code.
If you remove the statement within the if clause, the compiler sees that no code is left and therefore can optimize also the if itself away. So this is why you see the speedup when you remove this line of code.
I'm trying to fill a vector of an object Point 3D. My app read a csv file to load the vector by the three cordinate x, y, z. I use the type float.
This is my code.
main.cpp
int main(int argc, char** argv) {
char *theFileName = "file.csv"; //[100];
vector<Point> v = getPointCloud(theFileName);
for (int i = 0; i < v.size(); ++i) {
v.at(i).print(cout);
}
}
getPointCloud
vector<Point> getPointCloud(char *fileName) {
string line;
string token;
vector<Point> v;
double tab[3];
ifstream file(fileName);
if (file.is_open()) {
while (getline(file, line)) {
int cpt = 0;
stringstream stream(line);
while (getline(stream, token, ',')) {
tab[cpt] = ::atof(token.c_str());
cpt++;
}
Point p(tab[0], tab[1], tab[2]);
p.print(cout); <-- the display works
p.setColor(255, 0, 0);
v.push_back(p);
}
file.close();
} else {
cout << "Unable to open " << fileName << '\n';
exit(0);
}
return v;
}
I have two problems:
1 - when I try to display points in the main method, I found that the three coordinates are null ( == 0) but in the displaying in the getPointCloud method works very well.
2 - Can someone give a simple method to conserve my coordinates without loss precision after mathematical operations. I have searched in the net but I don't understand haw to solve it. I'm newbie with c++.
Point.h
#ifndef POINT_H
#define POINT_H
#include <math.h>
#include <iostream>
class Point {
protected:
float x;
float y;
float z;
// color RGB
float r;
float g;
float b;
public:
// Constructors
Point();
// Point(const Point& orig);
Point(std::ostream &strm);
Point(float x, float y, float z);
Point(const Point& orig);
virtual ~Point();
//getters
float getX() const {
return this->x;
}
float getY() const {
return this->y;
}
float getZ() const {
return this->z;
}
float getR() const {
return this->r;
}
float getG() const {
return this->g;
}
float getB() const {
return this->b;
}
//setters
void setX(float x) {
this->x = x;
}
void setY(float y) {
this->y = y;
}
void setZ(float z) {
this->z = z;
}
void setR(float r) {
this->r = r;
}
void setG(float g) {
this->g = g;
}
void setB(float b) {
this->b = b;
}
void setColor(float r, float g, float b) {
this->r = r;
this->g = g;
this->b = b;
}
/**
* Print the point
* #param strm
*/
void print(std::ostream &strm);
//Other methods
float dist2D(Point &other);
float dist3D(Point &other);
Point swap(Point p);
// Point operator-(const Point &other) const;
};
#endif /* POINT_H */
Point.cpp
#include <iostream>
#include <math.h>
#include <ostream>
using namespace std;
#include "Point.h"
Point::Point(const Point& orig) {
}
Point::Point(ostream &strm) {
strm << "Type the abscissa: ", cin >> this->x;
strm << "Type the ordinate: ", cin >> this->y;
strm << "Type the applicate: ", cin >> this->z;
}
Point::Point(float x, float y, float z) : x(x), y(y), z(z) {
// The default point color is blue
this->r = 0;
this->g = 0;
this->b = 255;
}
/**
* Destructor
*/
Point::~Point() {
}
//Other methods
float Point::dist2D(Point &other) {
float xd = x - other.x;
float yd = y - other.y;
return sqrt(xd * xd + yd * yd);
}
float Point::dist3D(Point &other) {
float xd = x - other.x;
float yd = y - other.y;
float zd = z - other.z;
return sqrt(xd * xd + yd * yd + zd * zd);
}
Point Point::swap(Point p) {
Point aux(x, y, z);
x = p.x;
y = p.y;
z = p.z;
return aux;
}
//Point Point::operator-(const Point &other) const {
// return Point(other.getX() - this->x, other.getY() - this->y, other.getZ() - this->z);
//}
void Point::print(ostream &strm) {
strm << "Point(" << this->x << "," << y << "," << z << ")" << endl;
}
Thanks in advance.
Point::Point(const Point& orig) {
}
is incorrect.
It does not copy data from orig to *this
Please copy each of the member in this constructor.
This would look like this:
Point::Point(const Point& orig) {
x = orig.x ;
y = orig.y ;
x = orig.z ;
r = orig.r ;
g = orig.g ;
b = orig.b ;
}