I'm working on programming my own little game which should have a visibility effect as described here. My world consists of Polygons which each have a list of Edges (sorted CW). I now want (as described in the article) to cast Rays towards the Edges of the polygons, find the intersections and retrieve a Polygon that defines the visible area.
So I wrote a classes for Vectors, Points, Edges and Polygons and adjusted the intersection-algorithm so it works with my code.
I then tested it and everything worked fine, but as I ran the Intersection algorithm in a for-loop to simulate a large amount of Edges processed(starting with 100, until 1000) the fps dropped drastically, with 100 Edges "only" 300fps (3000 before), and with 300 it dropped below 60 i think. This seems to be way to much drop for me as i wanted to reuse this code for my Lightsources and then i think i would quickly come up with processing way more than 300 Edges and it should run fast on way less powerful processors(i got an xeon e1230v3).
I figured out that only calling the EdgeIntersection the program runs many times faster, but I definitely need to loop through the Edges in my polygons so this is no option.
My Source-Code:
Vector.h/.cpp: Basic Vector class with two floats(X,Y), getters&setters, rotating
Vertex.h/.cpp: Basic Point class with a Position Vector, getters&setters and a boolean that indicates whether it is a Intersection Vertex
Edge.h/.cpp Basic Edge class with start/end-Verticies, getters&setters and rotating function(uses Vector.rotate())
Polygon.h:
#pragma once
#include <vector>
#include "Edge.h"
namespace geo
{
class Polygon
{
private:
std::vector<Edge> edges;
public:
Polygon();
Polygon(std::vector<Edge> edges);
~Polygon();
std::vector<Edge> getEdges();
Edge getEdge(int index);
int getEdgeCount();
void setEdges(std::vector<Edge> edges);
void setEdge(Edge e, int index);
void addEdge(Edge e);
void removeEdge(int index);
};
}
Ray.h:
#pragma once
#include "Vertex.h"
class Ray
{
private:
geo::Vertex origin;
geo::Vector dir;
public:
Ray();
Ray(geo::Vertex origin, geo::Vector dir);
~Ray();
geo::Vertex getOrigin();
geo::Vector getDirection();
void setOrigin(geo::Vertex origin);
void setDirection(geo::Vector dir);
};
LightModule.h:
#pragma once
#include "Polygon.h"
#include "Ray.h"
class LightModule
{
private:
//List of blocking Polygons
std::vector<geo::Polygon>* blockingPolygons;
std::vector<Ray> rays;
geo::Polygon bounds;
geo::Polygon visible;
/*geo::Polygon blocked;*/
//HitDetection Class later
geo::Vertex getIntersection(Ray r, geo::Edge* e);
geo::Vertex getClosestIntersection(Ray r, geo::Polygon *p);
public:
LightModule();
LightModule(std::vector<geo::Polygon>* blockingPolygons);
~LightModule();
//Set the Blocking Polygons
void setBlockingPolygons(std::vector<geo::Polygon>* blockingPolygons);
geo::Vertex callCI(Ray r, geo::Polygon* p);
geo::Vertex callI(Ray r, geo::Edge* e);
//Cast Rays towards Vertecies and store them in rays
void updateRays();
//Update Visibility Polygon
void updateVisible();
//Return Visibility Polygon
geo::Polygon* getVisible();
};
LightMModule.cpp:
#include "LightModule.h"
LightModule::LightModule()
{
rays.clear();
}
LightModule::LightModule(std::vector<geo::Polygon>* blockingPolygons)
{
this->blockingPolygons = blockingPolygons;
rays.clear();
}
LightModule::~LightModule()
{
}
void LightModule::setBlockingPolygons(std::vector<geo::Polygon>* blockingPolygons)
{
this->blockingPolygons = blockingPolygons;
}
//Test-cast a Ray (will follow mouse in the Test)
void LightModule::updateRays()
{
Ray r(geo::Vertex(geo::Vector(200, 100)), geo::Vector(-100, 0));
rays.push_back(r);
}
void LightModule::updateVisible()
{
}
//Both for Testing will later be part of a seperate class
geo::Vertex LightModule::callCI(Ray r, geo::Polygon *p)
{
return this->getClosestIntersection(r, p);
}
geo::Vertex LightModule::callI(Ray r, geo::Edge* e)
{
return this->getIntersection(r, e);
}
//TEST
geo::Vertex LightModule::getIntersection(Ray r, geo::Edge* e)
{
geo::Vertex v;
v.setIntersectVert(false);
float r_px = r.getOrigin().getPosition().getX();
float r_py = r.getOrigin().getPosition().getY();
float r_dx = r.getDirection().getX();
float r_dy = r.getDirection().getY();
float s_px = e->getOrigin().getPosition().getX();
float s_py = e->getOrigin().getPosition().getY();
float s_dx = e->getDirection().getX();
float s_dy = e->getDirection().getY();
float r_mag = sqrt(r_dx*r_dx + r_dy*r_dy);
float s_mag = sqrt(s_dx*s_dx + s_dy*s_dy);
if (r_dx / r_mag == s_dx / s_mag && r_dy / r_mag == s_dy / s_mag)
{
return v;
}
float T2 = (r_dx*(s_py - r_py) + r_dy*(r_px - s_px)) / (s_dx*r_dy - s_dy*r_dx);
float T1 = (s_px + s_dx*T2 - r_px) / r_dx;
if (T1 < 0 /*|| T1 > 1 For Lines*/)
{
return v;
}
if (T2 < 0 || T2 > 1)
{
return v;
}
v.setIntersectVert(true);
v.setPosition(geo::Vector(r_px + r_dx*T1, r_py + r_dy*T1));
return v;
}
geo::Vertex LightModule::getClosestIntersection(Ray r, geo::Polygon *p)
{
geo::Vertex v;
v.setIntersectVert(false);
geo::Vertex v_nearest(geo::Vector(0, 0));
v_nearest.setIntersectVert(false);
geo::Vector h1;
geo::Vector h2;
for (int i = 0; i < p->getEdges().size(); i++)
{
v = this->getIntersection(r, &p->getEdges().at(i));
h1.setX(v.getPosition().getX() - r.getOrigin().getPosition().getX());
h1.setY(v.getPosition().getY() - r.getOrigin().getPosition().getY());
h2.setX(v_nearest.getPosition().getX() - r.getOrigin().getPosition().getX());
h2.setY(v_nearest.getPosition().getY() - r.getOrigin().getPosition().getY());
if (i < 1)
v_nearest = v;
else if (v.isIntersectVert() == true && h1.getLength() < h2.getLength())
{
v_nearest = v;
}
}
return v_nearest;
}
For the Testing i create a Polygon a LightModule and call updateRays and then call the helper-Function callCI().
I know my code gets pretty messy when i have to cascade my getters and setters, ill have to fix that but for the Rest i hope everything is understandable and if not feel free to ask. And just to have mentioned it, I Test-draw my Objects with Vertex-Arrays but I don't need Graphical output of the intersection process, i just need the visible polygon.
Just to point out again: I need a faster way of finding the Intersection-Point between a Ray and a Polygon and as I didn't know if i did something wrong in my code I posted it all here so someone can maybe help me making my code more efficient or show me a different method to solve my problem.
Have a nice day and thank you for your answers :)
Paul
EDIT: Would it be meaningfully faster to first triangulate my polygons and then do a Ray-Triangle intersection Test?
I can't speak to the algorithm (which is possibly what you need) but some immediate thoughts on speeding up what you have.
First off you can define all your getters and setters inline (put them in the class in the header, not the separate source file) so the compiler can optimize the function calls away.
Then these changes might buy you a few frames:
// make sure your getters and setters are inline so the compiler
// can optimize them away
geo::Vertex LightModule::getClosestIntersection(Ray r, geo::Polygon* p)
{
geo::Vertex v;
v.setIntersectVert(false);
geo::Vector h1;
geo::Vector h2;
// cache these
Vector ray_position = r.getOrigin().getPosition();
geo::Vertex v_nearest(geo::Vector(0, 0));
v_nearest.setIntersectVert(false);
// cache size (don't dereference each time)
size_t size = p->getEdges().size();
// avoid acces violation
if(!size)
return v_nearest;
// preset item 0
v_nearest = this->getIntersection(r, &p->getEdges()[0]);
// start from 1 not 0
for(int i = 1; i < size; i++)
{
// don't use at() its slower
// v = this->getIntersection(r, &p->getEdges().at(i));
v = this->getIntersection(r, &p->getEdges()[i]);
// used cached ray position rather than call functions
h1.setX(v.getPosition().getX() - ray_position.getX());
h1.setY(v.getPosition().getY() - ray_position.getY());
h2.setX(v_nearest.getPosition().getX() - ray_position.getX());
h2.setY(v_nearest.getPosition().getY() - ray_position.getY());
// this if not needed because presetting item 0
//if(i < 1)
// v_nearest = v;
if(v.isIntersectVert() == true && h1.getLength() < h2.getLength())
{
v_nearest = v;
}
}
return v_nearest;
}
I removed one of the if statements by calculating the 0 item before the loop and starting the loop from 1, the rest is just caching a much used value and avoiding at() which is slower because it does bound-checking.
Related
I am currently mapping a Graph to a Minesweeper like grid, where every Block represents a node.
Here is my Graph class:
class Graph : public sf::Drawable
{
public:
Graph(uint32_t numNodesWidth, uint32_t numNodesHeight);
[[nodiscard]] std::vector<Node> & operator[](std::size_t i)
{ return data[i]; }
[[nodiscard]] sf::Vector2u dimension() const
{ return {static_cast<uint32_t>(data.size()),
static_cast<uint32_t>(data[0].size())};}
...
...
private:
std::vector<std::vector<Node>> data;
};
here is the implementation of the constructor:
Graph::Graph(uint32_t numNodesWidth, uint32_t numNodesHeight)
{
data.resize(numNodesHeight);
for(auto & row : data)
{
row.resize(numNodesWidth);
}
}
Somewhere in another class I read mouse coordinates and convert them to "Graph Coordinates":
sf::Vector2u translatedCoords = toGraphCoords(sf::Mouse::getPosition(window), nodeSize_);
bool inBounds = checkGraphBounds(translatedCoords, graph.dimension());
Here are the helper functions:
sf::Vector2u toGraphCoords(sf::Vector2i mouseCoord, sf::Vector2f nodeSize)
{
return {static_cast<uint32_t>(mouseCoord.y / nodeSize.y),
static_cast<uint32_t>(mouseCoord.x / nodeSize.x)};
}
bool checkGraphBounds(sf::Vector2u mouseCoord, sf::Vector2u bounds)
{
return mouseCoord.x >= 0 &&
mouseCoord.y >= 0 &&
mouseCoord.x < bounds.x &&
mouseCoord.y < bounds.y ;
}
Somehow I get the vector subscript out of range 1655 error when I try to use these new checked Coordinates which is somehow strange, can someone explain to me what I am doing wrong. This error always shows when I try to hover beyond the "Bounds" of the Interactive area, slightly behind or in front the first or the last Node.
Thanks in advance.
There is no guarantee that bounds <= num_nodes * node_size. This is especially risky since there are integer divisions involved, which means that you are at the mercy of rounding.
You could shuffle code around until such a guarantee is present, but there's a better way.
If the checkGraphBounds() function operated on the same math that the grid does, you could be sure that the result would be consistent with grid, no matter how that relates to the bounds.
The ideal way to do so would be to actually use toGraphCoords() as part of it:
bool checkGraphBounds(sf::Vector2u mouseCoord, const Graph& graph,
sf::Vector2f nodeSize)
{
auto coord = toGraphCoords(mouseCoord, nodeSize);
return coord.x >= 0 &&
coord.y >= 0 &&
coord.x < graph.dimensions().x &&
coord.y < graph.dimensions().y) ;
}
With this, you can formally guarantee that should a mouseCoord pass that test, static_cast<uint32_t>(mouseCoord.x / nodeSize.x)} will for certain return a value no greater than graph.dimensions().x.
Personally, I would combine both functions as a method of Graph like so:
class Graph : public sf::Drawable {
// Make nodeSize a member of the Graph
sf::Vector2f nodeSize_;
// This is one of the cases where caching an inferable value is worth it.
sf::Vector2u dimensions_;
public:
std::optional<sf::Vector2u> toGraphCoords(sf::Vector2i mouseCoord) {
sf::Vector2u coord{
static_cast<uint32_t>(mouseCoord.y / nodeSize_.y),
static_cast<uint32_t>(mouseCoord.x / nodeSize_.x)};
};
// No need to compare against 0, we are dealing with unsigned ints
if(coord.x < dimensions_.x &&
coord.y < dimensions_.y ) {
return coord;
}
return std::nullopt;
}
// ...
};
Usage:
void on_click(sf::Vector2i mouse_loc) {
auto maybe_graph_coord = the_graph.toGraphCoords(mouse_loc);
if(maybe_graph_coord) {
sf::Vector2u graph_coord = *maybe_graph_coord;
// ...
}
}
Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 1 year ago.
Improve this question
I'm programming a Breakout game in C++. I'm having a HUGE problem that's preventing me from giving the game multi-ball functionality. I think it has something to do with the destructor. Have a look:
for loop for the balls (Driver.cpp):
for (Ball& b : balls) { // Loops over all balls
(...)
// Collision for when you miss
if (b.getYPos() > HEIGHT) { // If the ball falls below the defined height of the screen
balls.erase(balls.begin() + b.getID()); // Wipe the ball out of memory to make room (Troublesome line)
Ball::addToID(-1); // Shift the ball ID to assign to the next ball back one
(...)
}
And I get this error:
Debug Error!
Program: Breakout.exe
abort() has been called
(Press Retry to debug the application)
Do you know why this mysterious crash is happening? Or more importantly, a fix for it?
Here's a replicable piece of code to help:
Driver.cpp:
#include <vector>
#include <allegro5\allegro.h>
#include "Ball.h"
using namespace std;
vector<Ball> balls(0); // Pay attention to this line
const POS WIDTH = 1600, HEIGHT = 900;
int main {
while (running) {
if (al_key_down(&key, ALLEGRO_KEY_SPACE)) { // Spawn the ball
balls.push_back(Ball(WIDTH / 2, 500, 10, 10)); // Spawn the ball
balls[Ball::getIDtoAssign()].setYSpeed(5);
}
for (Ball& b : balls) { // Pay attention to this loop
b.draw(); // This line is what's crashing.
b.move();
(...)
// Collision for when you miss
balls.erase(
remove_if(balls.begin(), balls.end(),
[=](Ball& b) {
// Collision for when you miss
return b.getYPos() > HEIGHT; // If the ball falls below the defined height of the screen, wipe the ball out of memory to make room
}
),
balls.end()
);
}
}
}
}
return 0;
}
Ball.h:
#pragma once
#include <allegro5\allegro_primitives.h>
using namespace std;
class Ball {
public:
Ball();
Ball(float x, float y, float w, float h);
~Ball();
void draw();
void move();
float getYPos();
void setYSpeed(float set);
private:
float xPos; // Horizontal position
float yPos; // Vertical position (upside down)
float width; // Sprite width
float height; // Sprite height
float xSpeed; // Horizontal speed
float ySpeed; // Vertical speed (inverted)
}
Ball.cpp:
#include "Ball.h"
short Ball::ballIDtoAssign = 0;
Ball::Ball() {
this->xPos = 0;
this->yPos = 0;
this->width = 0;
this->height = 0;
this->xSpeed = 0;
this->ySpeed = 0;
}
Ball::Ball(float x, float y, float w, float h) {
this->xPos = x;
this->yPos = y;
this->width = w;
this->height = h;
this->xSpeed = 0;
this->ySpeed = 0;
}
Ball::~Ball() {
// Destructor
}
void Ball::draw() {
al_draw_filled_rectangle(xPos, yPos, xPos + width, yPos + height, al_map_rgb(0xFF, 0xFF, 0xFF));
}
void Ball::move() {
xPos += xSpeed;
yPos += ySpeed;
}
float Ball::getYPos() {
return yPos;
}
void Ball::setYSpeed(float set) {
ySpeed = set;
}
You cannot modify a container while you are iterating through it with a range-for loop. You don't have access to the iterator that the loop uses internally, and erase() will invalidate that iterator.
You can use the container's iterators manually, paying attention to the new iterator that erase() returns, eg:
for(auto iter = balls.begin(); iter != balls.end(); ) { // Loops over all balls
Ball& b = *iter:
...
// Collision for when you miss
if (b.getYPos() > HEIGHT) { // If the ball falls below the defined height of the screen
...
iter = balls.erase(iter); // Wipe the ball out of memory to make room
}
else {
++iter;
}
}
Alternatively, use the erase-remove idiom via std::remove_if() instead:
balls.erase(
std::remove_if(balls.begin(), balls.end(),
[=](Ball &b){
// Collision for when you miss
return b.getYPos() > HEIGHT; // If the ball falls below the defined height of the screen, wipe the ball out of memory to make room
}
),
balls.end()
);
UPDATE: now that you have posted more of your code, it is clear to see that you are trying to use ID numbers as indexes into the vector, but you are not implementing those IDs correctly, and they are completely unnecessary and should be eliminated.
The Ball::ballID member is never being assigned any value, so in this statement:
balls.erase(balls.begin() + b.getID()); // The troublesome line
Trying to erase() the result of balls.begin() + b.getID() causes undefined behavior since the iterator has an indeterminate value, thus you can end up trying to erase the wrong Ball object, or even an invalid Ball object (which is likely the root cause of your runtime crash).
Also, in this section of code:
balls.push_back(Ball(WIDTH / 2, 500, 10, 10)); // Spawn the ball
balls[Ball::getIDtoAssign()].setYSpeed(5);
Ball::addToID(1);
Since you want to access the Ball object you just pushed, that code can be simplified to this:
balls.back().setYSpeed(5);
And I already gave you code further above to show you how to remove balls from the vector without using IDs.
So, there is need for an ID system at all.
With that said, try something more like this:
Driver.cpp:
#include <vector>
...
#include "Ball.h"
using namespace std;
vector<Ball> balls;
const POS WIDTH = 1600, HEIGHT = 900;
int main {
...
while (running) {
...
if (input.type == ALLEGRO_EVENT_TIMER) { // Runs at 60FPS
...
if (al_key_down(&key, ALLEGRO_KEY_SPACE)) { // Spawn the ball
balls.push_back(Ball(WIDTH / 2, 500, 10, 10)); // Spawn the ball
balls.back().setYSpeed(5);
}
for (auto iter = balls.begin(); iter != balls.end(); ) {
Ball &b = *iter;
...
if (b.getYPos() > HEIGHT) { // Collision for when you miss
iter = balls.erase(iter);
}
else {
++iter;
}
}
/* alternatively:
for (Ball& b : balls) {
b.draw();
b.move();
}
balls.erase(
std::remove_if(balls.begin(), balls.end(),
[=](Ball &b){
// Collision for when you miss
return b.getYPos() > HEIGHT; // If the ball falls below the defined height of the screen, wipe the ball out of memory to make room
}
),
balls.end()
);
*/
}
}
return 0;
}
Ball.h:
#pragma once
...
class Ball {
public:
...
// NO ID METHODS!
private:
...
// NO ID MEMBERS!
}
Ball.cpp:
#include "Ball.h"
...
// NO ID MEMBER/METHODS!
OK, so I managed to figure out why the program crashes. It was because I had the erase-remove inside the for loop which can cause all sorts of problems.
I am trying to implement a painters sort algorithm for a rendering assignment. The premise of the code is that I need to find the average depth of a polygon, and the list of polygons via the depth assigned to them by the for loop.
this is the polygons declaration, as well as a collection of the vertices of the polygon post transformation which are used for the calculation of the depth of the polygon
std::vector<Polygon3D> _polygons;
std::vector<Vertex> _transvertices;
This is the method called by the model class to sort the _polygons vector using std::sort
void Model::Sort()
{
for (int i = 0; i <= GetPolygonCount(); i++)
{
_polygons[i].SetDepth((_transvertices[_polygons[i].GetIndex(0)].Get(2) + _transvertices[_polygons[i].GetIndex(1)].Get(2) + _transvertices[_polygons[i].GetIndex(2)].Get(2)) / 3);
}
sort(_polygons.begin(), _polygons.end(), sortByDepth);
}
This code then links to this binary predicate
bool sortByDepth(const Polygon3D &lhs, const Polygon3D &rhs)
{
float m = lhs.GetDepth(); //For value testing
float n = rhs.GetDepth(); //For value testing
return lhs.GetDepth() > rhs.GetDepth();
}
The issue is, once the sort algorithm starts, the value of lhs and rhs never change - lhs always has a depth of 0 (and looking further into its assignment, it seems to be creating an entirely new polygon?) and rhs always has a value of 30.53 (the depth of the first polygon in the _polygons vertex
I'm concerned that the issue might be with not having a form of iterator linked to the Polygon3D class, but I wouldn't know where to start with making an iterator for the class.
Any help would be appreciated, I've looked through far too many similar questions, but none of them seem to be quite right for my particular problem.
EDIT:
Post got taken down because I didn't provide enough code apparently. I tried to reproduce the problem in a different project but for some reason it iterates just fine there.
This is the "shortest possible reproduction" I tried, but for some reason this doesn't seem to have the same issue as the original.
#include <vector>
#include <algorithm>
class Polygon3D
{
public:
Polygon3D(); // Example data for testing purposes
float GetDepth() const;
void SetDepth(float depth);
private:
float _depthAverage;
};
class Model
{
public:
Model();
size_t GetPolygonCount() const;
void Sort();
private:
std::vector<Polygon3D> _polygons;
std::vector<int> _vertices;
std::vector<int> _transvertices;
};
Polygon3D::Polygon3D()
{
//_depthAverage = float(rand() % 100);
}
float Polygon3D::GetDepth() const
{
return _depthAverage;
}
void Polygon3D::SetDepth(float depth)
{
_depthAverage = depth;
}
Model::Model()
{
for (int i = 0; i < 10; i++)
{
_polygons.push_back(Polygon3D());
}
this->Sort();
}
size_t Model::GetPolygonCount() const
{
return _polygons.size() - 1;
}
bool sortByDepth(const Polygon3D& lhs, const Polygon3D& rhs)
{
float m = lhs.GetDepth();
float n = rhs.GetDepth();
return lhs.GetDepth() > rhs.GetDepth();
}
void Model::Sort()
{
for (int i = 0; i <= GetPolygonCount(); i++)
{
_polygons[i].SetDepth(float(rand() % 100) / 3);
}
sort(_polygons.begin(), _polygons.end(), sortByDepth);
}
int main()
{
Model m = Model();
}
Edit 2:
I played around with just using an auto type variable to manually iterate over _polygons, and that seems to work. I dont understand why std::sort doesnt
auto begin = _polygons.begin();
while(true)
{
begin++;
}
The answer turned out to be something incredibly stupid on my own part. The issue was the copy constructor used within the Polygon3D class - I had forgotten to copy over the depth value in the copy constructor, which meant lhs did not get a depth value.
Currently I am working on porting a molecular dynamics simulation program, which was written in plain cpu C++, to Cuda. In short, the program initialises a list of atoms, transfers the control to an object of class CCalc which calculates atomic forces, velocities and positions for 100 (or another number of) iterations, and finally returns to draw the atoms on the screen.
My goal is to have all compute-heavy functions in CCalc run on the gpu. To prevent having to copy all calculation constants in CCalc one by one, I decided to copy the whole class to device memory, pointed to by this__d. Since the drawing function is called from the cpu, the atom list needs to be copied between cpu and gpu every 100 iterations and as such is not a member of CCalc.
In function CCalc::refreshCellList(), I want to rearrange atoms__d (the atom list residing in device memory) such that all atoms in the same cell are grouped together. In other words, atoms__d needs to be sorted with cellId as keys.
As I don't want to waste time implementing my own sorting algorithm, I tried using thrust::sort_by_key(). And here's where I got stuck. The function thrust::sort_by_key() requires device_ptr objects as arguments; however I cannot access cellId since I can only cast this__d to device_ptr, which I can't dereference on the cpu.
Is there a way to do this without having to break down the "class on gpu" structure?
Here is (an excerpt of) my code:
#include "cuda.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "device_functions.h"
#include <vector>
#include <thrust\sort.h>
#include <thrust\device_ptr.h>
#define REFRESH_CELL_LISTS 20
struct Atom
{
float pos[3];
float vel[3];
float force[3];
// others
};
std::vector<Atom> atom;
Atom *atom__d;
int noOfAtoms = 0;
class CCalc;
__global__ void makeCells(CCalc *C, Atom *A);
class CCalc
{
private:
CCalc *this__d;
public:
const int nAtoms = noOfAtoms;
int *cellId;
const int nCellX = 4, nCellY = 3;
// many force calculation constants
CCalc()
{
cudaMalloc((void**)&cellId, nAtoms*sizeof(int));
// some other stuff
cudaMalloc((void**)&this__d, sizeof(CCalc));
cudaMemcpy(this__d, this, sizeof(CCalc), cudaMemcpyHostToDevice);
}
// destructor
void relaxStructure(int numOfIterations)
{
cudaMalloc((void**)&atom__d, nAtoms*sizeof(Atom));
cudaMemcpy(atom__d, &atom[0], nAtoms*sizeof(Atom), cudaMemcpyHostToDevice);
for(int iter = 0; iter < numOfIterations; iter++)
{
// stuff
if(!(iter % REFRESH_CELL_LISTS)) refreshCellLists();
// calculate forces; update velocities and positions
}
cudaMemcpy(&atom[0], atom__d, nAtoms*sizeof(Atom), cudaMemcpyDeviceToHost);
cudaFree(atom__d);
}
// functions for force, velocity and position calculation
void refreshCellLists()
{
makeCells<<<(nAtoms + 31) / 32, 32>>>(this__d, atom__d);
cudaDeviceSynchronize();
// sort atom__d array using cellId as keys;
// here is where I would like to use thrust::sort_by_key()
}
};
__global__ void makeCells(CCalc *C, Atom *A)
{
int index = blockDim.x*blockIdx.x + threadIdx.x;
if(index < C->nAtoms)
{
// determine cell x, y based on position
// for now let's use an arbitrary mapping to obtain x, y
int X = (index * index) % C->nCellX;
int Y = (index * index) % C->nCellY;
C->cellId[index] = X + Y * C->nCellX;
}
}
int main()
{
cudaSetDevice(0);
noOfAtoms = 1000; // normally defined by input file
atom.resize(noOfAtoms);
// initialise atom positions, velocities and forces
CCalc calcObject;
while(true) // as long as we need
{
// draw atoms on screen
calcObject.relaxStructure(100);
}
}
Thank you very much.
In other words, atoms__d needs to be sorted with cellId as keys.
It should be possible to do that, at your indicated point in the refreshCellLists method. For simplicity, I have chosen to use the raw device pointers directly (although we could easily wrap these raw device pointers in thrust::device_ptr also) combined with the thrust::device execution policy. Here is a worked example:
$ cat t1156.cu
#include <vector>
#include <thrust/execution_policy.h>
#include <thrust/sort.h>
#include <thrust/device_ptr.h>
#define REFRESH_CELL_LISTS 20
struct Atom
{
float pos[3];
float vel[3];
float force[3];
// others
};
std::vector<Atom> atom;
Atom *atom__d;
int noOfAtoms = 0;
class CCalc;
__global__ void makeCells(CCalc *C, Atom *A);
class CCalc
{
private:
CCalc *this__d;
public:
const int nAtoms = noOfAtoms;
int *cellId;
const int nCellX = 4, nCellY = 3;
// many force calculation constants
CCalc()
{
cudaMalloc((void**)&cellId, nAtoms*sizeof(int));
// some other stuff
cudaMalloc((void**)&this__d, sizeof(CCalc));
cudaMemcpy(this__d, this, sizeof(CCalc), cudaMemcpyHostToDevice);
}
// destructor
void relaxStructure(int numOfIterations)
{
cudaMalloc((void**)&atom__d, nAtoms*sizeof(Atom));
cudaMemcpy(atom__d, &atom[0], nAtoms*sizeof(Atom), cudaMemcpyHostToDevice);
for(int iter = 0; iter < numOfIterations; iter++)
{
// stuff
if(!(iter % REFRESH_CELL_LISTS)) refreshCellLists();
// calculate forces; update velocities and positions
}
cudaMemcpy(&atom[0], atom__d, nAtoms*sizeof(Atom), cudaMemcpyDeviceToHost);
cudaFree(atom__d);
}
// functions for force, velocity and position calculation
void refreshCellLists()
{
makeCells<<<(nAtoms + 31) / 32, 32>>>(this__d, atom__d);
cudaDeviceSynchronize();
// sort atom__d array using cellId as keys;
thrust::sort_by_key(thrust::device, cellId, cellId+nAtoms, atom__d);
}
};
__global__ void makeCells(CCalc *C, Atom *A)
{
int index = blockDim.x*blockIdx.x + threadIdx.x;
if(index < C->nAtoms)
{
// determine cell x, y based on position
// for now let's use an arbitrary mapping to obtain x, y
int X = (index * index) % C->nCellX;
int Y = (index * index) % C->nCellY;
C->cellId[index] = X + Y * C->nCellX;
}
}
int main()
{
cudaSetDevice(0);
noOfAtoms = 1000; // normally defined by input file
atom.resize(noOfAtoms);
// initialise atom positions, velocities and forces
CCalc calcObject;
for (int i = 0; i < 100; i++) // as long as we need
{
// draw atoms on screen
calcObject.relaxStructure(100);
}
}
$ nvcc -std=c++11 -o t1156 t1156.cu
$ cuda-memcheck ./t1156
========= CUDA-MEMCHECK
========= ERROR SUMMARY: 0 errors
$
When building thrust codes, especially on windows, I usually make a set of recommendations as summarized here.
My Code works for my purely glut implementation, but I am trying to get it to work in qt.
I have a vector of masspoints for a wire mesh system
std::vector<masspoint> m_particles;
The problem is in my qt version none of what I write really sticks and I am left with an array of zeros. Basically I am confused why the glut version has correct values but the qt one does not given that it is basically identical code. What is wrong with the qt code?
Yes I only see zeros when using qDebug. When I am calling my drawing function in the qt version all vertex points turn out to be 0 in all components so nothing is seen.
int myboog = 1;
int county = 0;
// Constructors
Cloth::Cloth(float width, float height, int particles_in_width, int particles_in_height):
m_width(particles_in_width),
m_height(particles_in_height),
m_dimensionWidth(width),
m_dimensionHeight(height),
m_distanceX(width/(float)particles_in_width),
m_distanceY(height/(float)particles_in_height)
{
//Set the particle array to the given size
//Height by width
//mparticles is the name of our vector
m_particles.resize(m_width*m_height);
qDebug() << m_particles.size();
// Create the point masses to simulate the cloth
for (int x = 0; x < m_width; ++x)
{
for (int y=0; y < m_height; ++y)
{
// Place the pointmass of the cloth, lift the edges to give the wind more effect as the cloth falls
Vector3f position = Vector3f(m_dimensionWidth * (x / (float)m_width),
((x==0)||(x==m_width-1)||(y==0)||(y==m_height-1)) ? m_distanceY/2.0f:0,
m_dimensionHeight * (y / (float)m_height));
// The gravity effect is applied to new pmasspoints
m_particles[y * m_width + x] = masspoint(position,Vector3f(0,-0.06,0));
}
}
int num = (int)m_particles.size();
for (int i=0; i<num; ++i)
{
masspoint* p = &m_particles[i];
if(myboog)
{
qDebug() << "test " << *p->getPosition().getXLocation() << county;
county++;
}
}
myboog = 0;
// Calculate the normals for the first time so the initial draw is correctly lit
calculateClothNormals();
}
Code for masspoint involved in constructor for CLoth
#ifndef MASSPOINT_H
#define MASSPOINT_H
#include <QGLWidget>
#include "vector3f.h"
class masspoint
{
private:
Vector3f m_position; // Current Location of the pointmass
Vector3f m_velocity; // Direction and speed the pointmass is traveling in
Vector3f m_acceleration; // Speed at which the pointmass is accelerating (used for gravity)
Vector3f m_forceAccumulated; // Force that has been accumulated since the last update
Vector3f m_normal; // Normal of this pointmass, used to light the cloth when drawing
float m_damping; // Amount of velocity lost per update
bool m_stationary; // Whether this pointmass is currently capible of movement
public:
masspoint& operator= (const masspoint& particle);
//Some constructors
masspoint();
masspoint(const masspoint& particle);
masspoint(Vector3f position, Vector3f acceleration);
//Like eulur integration
void integrate(float duration);
// Accessor functions
//Get the position of the point mass
inline Vector3f getPosition() const {return m_position;}
Vector stuff involved in the constructor for CLoth
#ifndef VECTOR3F_H
#define VECTOR3F_H
#include <math.h>
// Vector library to be used
class Vector3f
{
private:
float m_x, m_y, m_z;
public:
const float* getXLocation() const { return &m_x; }