Check neighbouring voxels efficiently in 3D array of size n - c++

My voxel system uses a flat 3D array dynamically allocated at runtime for each chunk, however generating millions of cubes per chunk isn't feasible so I need to optimize.
The first optimization I intend to implement is of course to not generate mesh data for occluded voxels, this is a good idea on paper but I don't know how to do it.
All my attempts have ended up with hard to debug memory allocation issues and as such I have to throw the towel in and ask more knowledgeable people as I'm at loss.
My current incarnation of this is as such
int8_t x = 0, y= 0, z = 0;
const int MAX = CHUNKSIZE-1;
const int MIN = 0;
int8_t sPosX = (x - 1 < MIN) ? x : x-1;
int8_t sPosY = (y - 1 < MIN) ? y : y-1;
int8_t sPosZ = (z - 1 < MIN) ? z : z-1;
int8_t ePosX = (x + 1 > MAX) ? x : x+1;
int8_t ePosY = (y + 1 > MAX) ? y : y+1;
int8_t ePosZ = (z + 1 > MAX) ? z : z+1;
int8_t a=sPosX, b=sPosY, c=sPosZ;
int8_t add = 0;
BlockType BT = BT_grass;
scene::SMesh* mesh = new scene::SMesh();
for(x = 0; x <= MAX; x++)
{
for(y = 0; y <= MAX; y++)
{
for(z = 0; z <= MAX; z++)
{
cm = b_blocks[x][y][z].material;
//b_blocks[x][y][z].setFlags(0xFE, BT);
if( !b_blocks[x][x][z].isActive() )
{
continue;
}
else
{
if(sPosX == MIN)
{
createCube(x,y,z,c,mesh,cm);
}
else
{
if(a<=ePosX)
{
if(b<=ePosY)
{
if(c<=ePosZ)
{
printf("x %d, y %d, z %d\n", x, y, z);
if(!b_blocks[x][y][z].isActive())
{
add = 1;
}
}
}
}
if(add == 1)
{
createCube(x,y,z,c,mesh,cm);
add = 0;
}
}
}
}
}
}
The if(sPosX == MIN) is a hack I implemented to not segfault on generating the chunk (otherwise it segfaults with a memory access violation on generating block[CHUNKSIZE][CHUNKSIZE][CHUNKSIZE], which isn't very nice.
This hack inadvertently makes sure that all cubes are generated however and is just as unappealing.
The concise questions here are as follows:
What part of my logic is broken? (presumably all of it) and how would one properly check neighbouring blocks in a fast manner that does not cause an out of bounds error? (I tried by manually coding exceptions for every last corner case but that proved to be unmaintainable and was orders of magnitude slower and prone to segfaulting)

I would use something like:
class BlockChunk final
{
public:
static constexpr int sizeXShift = 4, sizeYshift = 8, sizeZshift = 4;
static constexpr int sizeX = 1 << sizeXShift; // 2 ** sizeXShift
static constexpr int sizeY = 1 << sizeYShift;
static constexpr int sizeZ = 1 << sizeZShift;
static constexpr int sizeXRelativeMask = sizeX - 1; // mask to get position mod sizeX (faster than % because negative inputs to % return negative answers which need more adjusting whereas masking always returns the positive answer)
static constexpr int sizeYRelativeMask = sizeY - 1;
static constexpr int sizeZRelativeMask = sizeZ - 1;
static constexpr int sizeXChunkBaseMask = ~sizeXRelativeMask; // mask to get position - relativePosition (aka chunk base position)
static constexpr int sizeYChunkBaseMask = ~sizeYRelativeMask;
static constexpr int sizeZChunkBaseMask = ~sizeZRelativeMask;
private:
Block blocks[sizeX][sizeY][sizeZ];
public:
const PositionI basePosition;
BlockChunk(PositionI basePosition)
: basePosition(basePosition)
{
}
Block &at(PositionI relative)
{
assert(relative.x >= 0 && relative.x < sizeX);
assert(relative.y >= 0 && relative.y < sizeY);
assert(relative.z >= 0 && relative.z < sizeZ); // these asserts are important for finding out-of-bounds bugs
return blocks[relative.x][relative.y][relative.z];
}
static PositionI getRelativePosition(PositionI p)
{
p.x &= sizeXRelativeMask;
p.y &= sizeYRelativeMask;
p.z &= sizeZRelativeMask;
return p;
}
static PositionI getChunkBasePosition(PositionI p)
{
p.x &= sizeXChunkBaseMask;
p.y &= sizeYChunkBaseMask;
p.z &= sizeZChunkBaseMask;
return p;
}
};
class BlockIterator;
class BlockWorldBase
{
friend class BlockIterator;
private:
std::unordered_map<PositionI, std::shared_ptr<BlockChunk>> chunks;
BlockChunk *getOrMakeChunk(PositionI chunkBasePosition)
{
std::shared_ptr<BlockChunk> &chunk = chunks[chunkBasePosition];
if(chunk == nullptr)
chunk = std::make_shared<BlockChunk>(chunkBasePosition);
return chunk.get();
}
};
class BlockWorld;
class BlockIterator final
{
friend class BlockWorld;
private:
BlockChunk *chunk;
BlockWorldBase *world;
PositionI chunkBasePosition, relativePosition;
void updateChunk()
{
chunk = world->getOrMakeChunk(chunkBasePosition);
}
BlockIterator(BlockWorldBase *world, PositionI position)
: chunk(),
world(world),
chunkBasePosition(BlockChunk::getChunkBasePosition(position)),
relativePosition(BlockChunk::getRelativePosition(position))
{
updateChunk();
}
public:
PositionI getPosition() const
{
return relativePosition + chunkBasePosition;
}
Block &get()
{
return chunk->at(relativePosition);
}
BlockIterator &operator +=(PositionI deltaPosition) // move to point to a new block
{
PositionI newRelativePosition = relativePosition + deltaPosition;
if(BlockChunk::getRelativePosition(newRelativePosition) != newRelativePosition) // if the new position is outside of this chunk
{
relativePosition = BlockChunk::getRelativePosition(newRelativePosition);
chunkBasePosition += BlockChunk::getChunkBasePosition(newRelativePosition);
updateChunk();
}
else
{
relativePosition = newRelativePosition;
}
}
friend BlockIterator operator +(PositionI p, BlockIterator bi)
{
bi += p;
return bi;
}
friend BlockIterator operator +(BlockIterator bi, PositionI p)
{
bi += p;
return bi;
}
};
class BlockWorld final : public BlockWorldBase
{
public:
BlockIterator getIterator(PositionI p)
{
return BlockIterator(this, p);
}
};
If you leave the asserts in and access thru BlockIterator you shouldn't ever seg-fault
void drawBlock(Renderer &renderer, BlockIterator bi)
{
BlockIterator nxBlockIterator = bi + PositionI(-1, 0, 0);
BlockIterator pxBlockIterator = bi + PositionI(1, 0, 0);
BlockIterator nyBlockIterator = bi + PositionI(0, -1, 0);
BlockIterator pyBlockIterator = bi + PositionI(0, 1, 0);
BlockIterator nzBlockIterator = bi + PositionI(0, 0, -1);
BlockIterator pzBlockIterator = bi + PositionI(0, 0, 1);
if(nxBlockIterator.get().isPXFaceBlocked())
bi.get().renderNXFace(renderer, bi);
if(pxBlockIterator.get().isNXFaceBlocked())
bi.get().renderPXFace(renderer, bi);
if(nyBlockIterator.get().isPYFaceBlocked())
bi.get().renderNYFace(renderer, bi);
if(pyBlockIterator.get().isNYFaceBlocked())
bi.get().renderPYFace(renderer, bi);
if(nzBlockIterator.get().isPZFaceBlocked())
bi.get().renderNZFace(renderer, bi);
if(pzBlockIterator.get().isNZFaceBlocked())
bi.get().renderPZFace(renderer, bi);
bi.get().renderCenter(renderer, bi);
}

You don't show how the b_blocks variable is declared or initialized, but given that you're getting a segmentation error, it's likely you declared it as a smaller size than your CHUNK_SIZE.

Related

Fill Matrix in Spiral Form from center

I recently finished making an algorithm for a project I'm working on.
Briefly, a part of my project needs to fill a matrix, the requirements of how to do it are these:
- Fill the matrix in form of spiral, from the center.
- The size of the matrix must be dynamic, so the spiral can be large or small.
- Every two times a cell of the matrix is filled, //DO STUFF must be executed.
In the end, the code that I made works, it was my best effort and I am not able to optimize it more, it bothers me a bit having had to use so many ifs, and I was wondering if someone could take a look at my code to see if it is possible to optimize it further or some constructive comment (it works well, but it would be great if it was faster, since this algorithm will be executed several times in my project). Also so that other people can use it!
#include <stdio.h>
typedef unsigned short u16_t;
const u16_t size = 7; //<-- CHANGE HERE!!! just odd numbers and bigger than 3
const u16_t maxTimes = 2;
u16_t array_cont[size][size] = { 0 };
u16_t counter = 3, curr = 0;
u16_t endColumn = (size - 1) / 2, endRow = endColumn;
u16_t startColumn = endColumn + 1, startRow = endColumn + 1;
u16_t posLoop = 2, buffer = startColumn, i = 0;
void fillArray() {
if (curr < maxTimes) {
if (posLoop == 0) { //Top
for (i = buffer; i <= startColumn && curr < maxTimes; i++, curr++)
array_cont[endRow][i] = counter++;
if (curr == maxTimes) {
if (i <= startColumn) {
buffer = i;
} else {
buffer = endRow;
startColumn++;
posLoop++;
}
} else {
buffer = endRow;
startColumn++;
posLoop++;
fillArray();
}
} else if (posLoop == 1) { //Right
for (i = buffer; i <= startRow && curr < maxTimes; i++, curr++)
array_cont[i][startColumn] = counter++;
if (curr == maxTimes) {
if (i <= startRow) {
buffer = i;
} else {
buffer = startColumn;
startRow++;
posLoop++;
}
} else {
buffer = startColumn;
startRow++;
posLoop++;
fillArray();
}
} else if (posLoop == 2) { //Bottom
for (i = buffer; i >= endColumn && curr < maxTimes; i--, curr++)
array_cont[startRow][i] = counter++;
if (curr == maxTimes) {
if (i >= endColumn) {
buffer = i;
} else {
buffer = startRow;
endColumn--;
posLoop++;
}
} else {
buffer = startRow;
endColumn--;
posLoop++;
fillArray();
}
} else if (posLoop == 3) { //Left
for (i = buffer; i >= endRow && curr < maxTimes; i--, curr++)
array_cont[i][endColumn] = counter++;
if (curr == maxTimes) {
if (i >= endRow) {
buffer = i;
} else {
buffer = endColumn;
endRow--;
posLoop = 0;
}
} else {
buffer = endColumn;
endRow--;
posLoop = 0;
fillArray();
}
}
}
}
int main(void) {
array_cont[endColumn][endColumn] = 1;
array_cont[endColumn][endColumn + 1] = 2;
//DO STUFF
u16_t max = ((size * size) - 1) / maxTimes;
for (u16_t j = 0; j < max; j++) {
fillArray();
curr = 0;
//DO STUFF
}
//Demostration
for (u16_t x = 0; x < size; x++) {
for (u16_t y = 0; y < size; y++)
printf("%-4d ", array_cont[x][y]);
printf("\n");
}
return 0;
}
Notice that the numbers along the diagonal (1, 9, 25, 49) are the squares of the odd numbers. That's an important clue, since it suggests that the 1 in the center of the matrix should be treated as the end of a spiral.
From the end of each spiral, the x,y coordinates should be adjusted up and to the right by 1. Then the next layer of the spiral can be constructed by moving down, left, up, and right by the same amount.
For example, starting from the position of the 1, move up and to the right (to the position of the 9), and then form a loop with the following procedure:
move down, and place the 2
move down, and place the 3
move left, and place the 4
move left, and place the 5
etc.
Thus the code looks something like this:
int size = 7;
int matrix[size][size];
int dy[] = { 1, 0, -1, 0 };
int dx[] = { 0, -1, 0, 1 };
int directionCount = 4;
int ringCount = (size - 1) / 2;
int y = ringCount;
int x = ringCount;
int repeatCount = 0;
int value = 1;
matrix[y][x] = value++;
for (int ring = 0; ring < ringCount; ring++)
{
y--;
x++;
repeatCount += 2;
for (int direction = 0; direction < directionCount; direction++)
for (int repeat = 0; repeat < repeatCount; repeat++)
{
y += dy[direction];
x += dx[direction];
matrix[y][x] = value++;
}
}
I saw already many approaches for doing a spiral. All a basically drawing it, by following a path.
BUT, you can also come up with an analytical calculation formula for a spiral.
So, no recursion or iterative solution by following a path or such. We can directly calculate the indices in the matrix, if we have the running number.
I will start with the spiral in mathematical positive direction (counter clockwise) in a cartesian coordinate system. We will concentrate on X and Y coordinates.
I made a short Excel and derived some formulas from that. Here is a short picture:
From the requirements we know that the matrix will be quadratic. That makes things easier. A little bit trickier is, to get the matrix data symmetrical. But with some simple formulas, derived from the prictures, this is not really a problem.
And then we can calculate x and y coordinates with some simple statements. See the below example program with long variable names for better understanding. The code is made using some step by step approach to illustrate the implementation. Of course it can be made more compact easily. Anyway. Let's have a look.
#include <iostream>
#include <cmath>
#include <iomanip>
int main() {
// Show some example values
for (long step{}; step < 81; ++step) {
// Calculate result
const long roundedSquareRoot = std::lround(std::sqrt(step));
const long roundedSquare = roundedSquareRoot * roundedSquareRoot;
const long distance = std::abs(roundedSquare - step) - roundedSquareRoot;
const long rsrIsOdd = (roundedSquareRoot % 2);
const long x = (distance + roundedSquare - step - rsrIsOdd) / (rsrIsOdd ? -2 : 2);
const long y = (-distance + roundedSquare - step - rsrIsOdd) / (rsrIsOdd ? -2 : 2);
// Show ouput
std::cout << "Step:" << std::setw(4) << step << std::setw(3) << x << ' ' << std::setw(3) << y << '\n';
}
}
So, you see that we really have an analytical solution. Given any number we can calculate the x and y coordinate using a formula. Cool.
Getting indices in a matrix is just adding some offset.
With that gained know how, we can now easily calculate the complete matrix. And, since there is no runtime activity needed at all, we can let the compiler do the work. We will simply use constexpr functions for everything.
Then the compiler will create this matrix at compile time. At runtime, nothing will happen.
Please see a very compact solution:
#include <iostream>
#include <iomanip>
#include <array>
constexpr size_t MatrixSize = 15u;
using MyType = long;
static_assert(MatrixSize > 0 && MatrixSize%2, "Matrix size must be odd and > 0");
constexpr MyType MatrixHalf = MatrixSize / 2;
using Matrix = std::array<std::array<MyType, MatrixSize>, MatrixSize >;
// Some constexpr simple mathematical functions ------------------------------------------------------------------------------
// No need for <cmath>
constexpr MyType myAbs(MyType v) { return v < 0 ? -v : v; }
constexpr double mySqrtRecursive(double x, double c, double p) {return c == p? c: mySqrtRecursive(x, 0.5 * (c + x / c), c); }
constexpr MyType mySqrt(MyType x) {return (MyType)(mySqrtRecursive((double)x,(double)x,0.0)+0.5); }
// Main constexpr function will fill the matrix with a spiral pattern during compile time -------------------------------------
constexpr Matrix fillMatrix() {
Matrix matrix{};
for (int i{}; i < (MatrixSize * MatrixSize); ++i) {
const MyType rsr{ mySqrt(i) }, rs{ rsr * rsr }, d{ myAbs(rs - i) - rsr }, o{ rsr % 2 };
const size_t col{ (size_t)(MatrixHalf +((d + rs - i - o) / (o ? -2 : 2)))};
const size_t row{ (size_t)(MatrixHalf -((-d + rs - i - o) / (o ? -2 : 2)))};
matrix[row][col] = i;
}
return matrix;
}
// This is a compile time constant!
constexpr Matrix matrix = fillMatrix();
// All the above has been done during compile time! -----------------------------------------
int main() {
// Nothing to do. All has beend done at compile time already!
// The matrix is already filled with a spiral pattern
// Just output
for (const auto& row : matrix) {
for (const auto& col : row) std::cout << std::setw(5) << col << ' '; std::cout << '\n';
}
}
Different coordinate systems or other spiral direction can be adapted easily.
Happy coding.

BVH Tree Construction - Compiling gives Random mistakes

Much thanks for the help in additionally.
I'm trying to build a BVH Tree with Surface Area Heuristic, but everytime I compile my code it gives me random errors like:
"Access violation reading location"
"Run-Time Check Failure #2 - Stack around the variable 'x' was
corrupted."
"Stack overflow "
The errors happen in the BVH::buildSAH() function.
And I have tried to find a solution for the whole day, meaningless. Could it be something from the std::partition function or from sending variables with pointers to a recursion?
I'm reading from the book "Physically Based Rendering: From Theory to Implementation
By Matt Pharr, Greg Humphreys"
It works for 2 primitives in the area, but thats trivial...
If you would like to clone: https://github.com/vkaytsanov/MortonCode-BVH-KD
My BVH.hpp:
#include <vector>
#include <cassert>
#include <algorithm>
#include "memory.hpp"
#include "Screen.hpp"
#include "Point3D.hpp"
#include "BoundBox.hpp"
#pragma once
enum Axis{
X, Y, Z
};
struct MortonPrimitive{
int primitiveIndex;
uint32_t mortonCode;
};
struct BVHPrimitiveInfo {
BVHPrimitiveInfo() {}
BVHPrimitiveInfo(int primitiveNumber, const BoundBox& box) : primitiveNumber(primitiveNumber), box(box),
centroid(Point3D(box.pMin.x* 0.5f + box.pMax.x * 0.5f, box.pMin.y* 0.5f + box.pMax.y * 0.5f, box.pMin.z* 0.5f + box.pMax.z * 0.5f)) {}
int primitiveNumber;
BoundBox box;
Point3D centroid;
};
struct BVHNode {
void InitLeaf(int first, int n, const BoundBox& b) {
firstPrimOffset = first;
nPrimitives = n;
box = b;
children[0] = children[1] = nullptr;
}
void InitInterior(int axis, BVHNode* c0, BVHNode* c1) {
assert(c0 != NULL || c1 != NULL);
children[0] = c0;
children[1] = c1;
this->box = Union(c0->box, c1->box);
splitAxis = axis;
nPrimitives = 0;
}
BoundBox box;
BVHNode* children[2];
int splitAxis, firstPrimOffset, nPrimitives;
};
struct LinearBVHNode {
BoundBox bounds;
union {
int primitivesOffset; // leaf
int secondChildOffset; // interior
};
uint16_t nPrimitives; // 0 -> interior node
uint8_t axis; // interior node: xyz
uint8_t pad[1]; // ensure 32 byte total size
};
struct BVHLittleTree {
int startIndex;
int numPrimitives;
BVHNode* nodes;
};
struct BVH {
BVH(std::vector<std::shared_ptr<Primitive>> p) : primitives(std::move(p)) {
std::vector<BVHPrimitiveInfo> BVHPrimitives;
BVHPrimitives.reserve(primitives.size());
for (int i = 0; i < primitives.size(); i++) {
BVHPrimitives.push_back({ i, primitives[i]->box });
}
MemoryArena arena(1024 * 1024);
int totalNodes = 0;
std::vector<std::shared_ptr<Primitive>> orderedPrimitives;
orderedPrimitives.reserve(primitives.size());
BVHNode* root;
root = HLBVHBuild(arena, BVHPrimitives, &totalNodes, orderedPrimitives);
primitives.swap(orderedPrimitives);
BVHPrimitives.resize(0);
printf("BVH created with %d nodes for %d "
"primitives (%.4f MB), arena allocated %.2f MB\n",
(int)totalNodes, (int)primitives.size(),
float(totalNodes * sizeof(LinearBVHNode)) /
(1024.f * 1024.f),
float(arena.TotalAllocated()) /
(1024.f * 1024.f));
assert(root != NULL);
nodes = AllocAligned<LinearBVHNode>(totalNodes);
int offset = 0;
flattenBVHTree(root, &offset);
}
~BVH() { FreeAligned(nodes); }
BVHNode* build(std::vector<MortonPrimitive>&, std::vector<Primitive>&);
BVHNode* HLBVHBuild(MemoryArena& arena, const std::vector<BVHPrimitiveInfo>& BVHPrimitives, int* totalNodes, std::vector<std::shared_ptr<Primitive>>& orderedPrims);
BVHNode* emit(BVHNode*& nodes, const std::vector<BVHPrimitiveInfo>& BVHPrimitives, MortonPrimitive* mortonPrimitives, std::vector<std::shared_ptr<Primitive>>&, int, int*, int*, int);
BVHNode* buildSAH(MemoryArena& arena, std::vector<BVHNode*>& treeRoots, int start, int end, int* total) const;
int flattenBVHTree(BVHNode*, int*);
std::vector<std::shared_ptr<Primitive>> primitives;
LinearBVHNode* nodes = nullptr;
int maxPrimsInNode = 1;
};
inline uint32_t LeftShift3(uint32_t x) {
if (x == (1 << 10)) --x;
x = (x | (x << 16)) & 0b00000011000000000000000011111111;
x = (x | (x << 8)) & 0b00000011000000001111000000001111;
x = (x | (x << 4)) & 0b00000011000011000011000011000011;
x = (x | (x << 2)) & 0b00001001001001001001001001001001;
return x;
}
uint32_t EncodeMorton3(const Point3D& p) {
return (LeftShift3(p.z) << 2) |
(LeftShift3(p.y) << 1) |
(LeftShift3(p.x) << 0);
}
short bitValue(uint32_t& number, uint32_t& mask) {
return number & mask ? 1 : 0;
}
static void radixSort(std::vector<MortonPrimitive>* v)
{
std::vector<MortonPrimitive> tempVector(v->size());
const int bitsPerPass = 6;
const int nBits = 30;
static_assert((nBits % bitsPerPass) == 0,
"Radix sort bitsPerPass must evenly divide nBits");
const int nPasses = nBits / bitsPerPass;
for (int pass = 0; pass < nPasses; ++pass) {
// Perform one pass of radix sort, sorting _bitsPerPass_ bits
int lowBit = pass * bitsPerPass;
// Set in and out vector pointers for radix sort pass
std::vector<MortonPrimitive>& in = (pass & 1) ? tempVector : *v;
std::vector<MortonPrimitive>& out = (pass & 1) ? *v : tempVector;
// Count number of zero bits in array for current radix sort bit
const int nBuckets = 1 << bitsPerPass;
int bucketCount[nBuckets] = { 0 };
const int bitMask = (1 << bitsPerPass) - 1;
for (const MortonPrimitive& mp : in) {
int bucket = (mp.mortonCode >> lowBit) & bitMask;
++bucketCount[bucket];
}
// Compute starting index in output array for each bucket
int outIndex[nBuckets];
outIndex[0] = 0;
for (int i = 1; i < nBuckets; ++i)
outIndex[i] = outIndex[i - 1] + bucketCount[i - 1];
// Store sorted values in output array
for (const MortonPrimitive& mp : in) {
int bucket = (mp.mortonCode >> lowBit) & bitMask;
out[outIndex[bucket]++] = mp;
}
}
// Copy final result from _tempVector_, if needed
if (nPasses & 1) std::swap(*v, tempVector);
}
//BVHNode* BVH::build(std::vector<MortonPrimitive>& mortonPrimitives, std::vector<Primitive>& prims) {
//
//
//}
struct BucketInfo {
int count = 0;
BoundBox bounds;
};
BVHNode* BVH::HLBVHBuild(MemoryArena& arena, const std::vector<BVHPrimitiveInfo>& BVHPrimitives, int* totalNodes, std::vector<std::shared_ptr<Primitive>>& orderedPrims) {
BoundBox box;
for (const BVHPrimitiveInfo& pi : BVHPrimitives) {
box = box.Union(box, pi.centroid); // maybe it should be UNION #TODO
}
std::vector<MortonPrimitive> mortonPrims(BVHPrimitives.size());
for (int i = 0; i < BVHPrimitives.size(); i++) {
const int mortonBits = 10;
const int mortonScale = 1 << mortonBits;
mortonPrims[i].primitiveIndex = BVHPrimitives[i].primitiveNumber;
Point3D p = box.offset(BVHPrimitives[i].centroid);
p.x = p.x * mortonScale;
p.y = p.y * mortonScale;
p.z = p.z * mortonScale;
mortonPrims[i].mortonCode = EncodeMorton3(p);
}
radixSort(&mortonPrims);
//for (MortonPrimitive mp : mortonPrims) {
// std::cout << mp.primitiveIndex << " " << mp.mortonCode << std::endl;
//}
std::vector<BVHLittleTree> treesToBuild;
uint32_t mask = 0b00111111111111000000000000000000; // first 12 bits describe the position of the primitive
for (int start = 0, end = 1; end <= (int)mortonPrims.size(); end++) {
if (end == mortonPrims.size() || ((mortonPrims[start].mortonCode & mask) != (mortonPrims[end].mortonCode & mask))) {
int n = end - start;
int maxNodes = 2 * n;
BVHNode* nodes = arena.Alloc<BVHNode>(maxNodes, false);
treesToBuild.push_back({ start, n, nodes });
start = end;
}
}
int orderedPrimsOffset = 0;
orderedPrims.resize(primitives.size());
int nodesCreated = 0;
int firstBitIndex = 29 - 12;
for (int i = 0; i < treesToBuild.size(); i++) {
treesToBuild[i].nodes = BVH::emit(treesToBuild[i].nodes, BVHPrimitives, &mortonPrims[treesToBuild[i].startIndex], orderedPrims, treesToBuild[i].numPrimitives, &nodesCreated, &orderedPrimsOffset, firstBitIndex);
*totalNodes += nodesCreated;
}
totalNodes += nodesCreated;
std::vector<BVHNode*> finishedTrees;
finishedTrees.reserve(treesToBuild.size());
for (BVHLittleTree& tr : treesToBuild) {
finishedTrees.emplace_back(tr.nodes);
}
return buildSAH(arena, finishedTrees, 0, finishedTrees.size(), totalNodes);
}
BVHNode* BVH::emit(BVHNode*& nodes, const std::vector<BVHPrimitiveInfo>& BVHPrimitive, MortonPrimitive* mortonPrimitives, std::vector<std::shared_ptr<Primitive>>& orderedPrimitives, int primitivesCount, int* totalNodes, int* orderedPrimsOffset, int bitIndex) {
if (bitIndex == -1 || primitivesCount < maxPrimsInNode) {
(*totalNodes)++;
BVHNode* tmp = nodes++;
BoundBox box;
int firstPrimOffset = *orderedPrimsOffset;
for (int i = 0; i < primitivesCount; i++) {
int index = mortonPrimitives[i].primitiveIndex;
orderedPrimitives[firstPrimOffset + i] = primitives[index];
box = box.Union(box, BVHPrimitive[index].box);
}
tmp->InitLeaf(0, primitivesCount, box);
return tmp;
}
else {
int mask = 1 << bitIndex;
if ((mortonPrimitives[0].mortonCode & mask) == (mortonPrimitives[primitivesCount - 1].mortonCode & mask)){ // Next tree if nothing to split for this bit
return emit(nodes, BVHPrimitive, mortonPrimitives, orderedPrimitives, primitivesCount, totalNodes, orderedPrimsOffset, bitIndex - 1);
}
int start = 0;
int end = primitivesCount - 1;
while (start + 1 != end) {
int mid = (end - start) / 2 + start; // (start-end)/2
if ((mortonPrimitives[start].mortonCode & mask) == (mortonPrimitives[mid].mortonCode & mask)) {
start = mid;
}
else {
end = mid;
}
}
int split = end;
(*totalNodes)++;
BVHNode* tmp = nodes++;
BVHNode* lbvh[2];
lbvh[0] = emit(nodes, BVHPrimitive, mortonPrimitives, orderedPrimitives, split, totalNodes, orderedPrimsOffset, bitIndex-1);
lbvh[1] = emit(nodes, BVHPrimitive, &mortonPrimitives[split], orderedPrimitives, primitivesCount - split, totalNodes, orderedPrimsOffset, bitIndex - 1);
int axis = bitIndex % 3;
tmp->InitInterior(axis, lbvh[0], lbvh[1]);
return tmp;
}
}
BVHNode* BVH::buildSAH(MemoryArena& arena, std::vector<BVHNode*>& treeRoots, int start, int end, int* total) const {
int nodesCount = end - start;
if (nodesCount == 1) {
return treeRoots[start];
}
assert(nodesCount > 1);
(*total)++;
BVHNode* node = arena.Alloc<BVHNode>();
BoundBox box;
for (int i = start; i < end; i++) {
box = Union(box, treeRoots[i]->box);
}
BoundBox centroidBox;
for (int i = start; i < end; i++) {
Point3D centroid = Point3D((treeRoots[i]->box.pMin.x + treeRoots[i]->box.pMax.x) * 0.5f, (treeRoots[i]->box.pMin.y + treeRoots[i]->box.pMax.y) * 0.5f, (treeRoots[i]->box.pMin.z + treeRoots[i]->box.pMax.z) * 0.5f);
centroidBox = Union(centroidBox, centroid);
}
const int dimension = centroidBox.MaximumExtent();
const int nBuckets = 12;
struct Buckets {
int count = 0;
BoundBox box;
};
Buckets buckets[nBuckets];
for (int i = start; i < end; i++) {
float centroid = (treeRoots[i]->box.pMin[dimension] * 0.5f + treeRoots[i]->box.pMax[dimension] * 0.5f) ;
int b = nBuckets * ((centroid - centroidBox.pMin[dimension]) / (centroidBox.pMax[dimension] - centroidBox.pMin[dimension]));
if (b == nBuckets) b = nBuckets - 1;
//assert(b < nBuckets);
buckets[b].count++;
buckets[b].box = Union(buckets[b].box, treeRoots[i]->box);
}
float cost[nBuckets - 1];
for (int i = 0; i < nBuckets - 1; i++) {
BoundBox b0, b1;
int count0 = 0, count1 = 0;
for (int j = 0; j <= i; j++) {
b0 = Union(b0, buckets[j].box);
count0 += buckets[j].count;
}
for (int j = i+1; j < nBuckets; j++) {
b1 = Union(b1, buckets[j].box);
count1 += buckets[j].count;
}
cost[i] = (.125f + (count0 * b0.surfaceArea() + count1 * b1.surfaceArea())) / box.surfaceArea();
}
double minCost = cost[0];
int minCostSplitBucket = 0;
for (int i = 1; i < nBuckets - 1; ++i) {
if (cost[i] < minCost) {
minCost = cost[i];
minCostSplitBucket = i;
}
}
BVHNode** pmid = std::partition(&treeRoots[start], &treeRoots[end - 1] + 1, [=](const BVHNode* node) {
double centroid = (node->box.pMin[dimension]*0.5f + node->box.pMax[dimension] * 0.5f) ;
int b = nBuckets * ((centroid - centroidBox.pMin[dimension]) / (centroidBox.pMax[dimension] - centroidBox.pMin[dimension]));
if (b == nBuckets) b = nBuckets - 1;
return b <= minCostSplitBucket;
});
assert(pmid != NULL);
//std::cout << pmid << " " << &treeRoots[0];
int mid = pmid - &treeRoots[0];
//std::cout << start << " " << mid << std::endl;
//std::cout << mid << " " << end << std::endl;
std::cout << dimension << std::endl;
//assert(dimension < 3);
node->InitInterior(dimension, this->buildSAH(arena, treeRoots, start, mid, total), this->buildSAH(arena, treeRoots, mid, end, total));
return node;
}
int BVH::flattenBVHTree(BVHNode* node, int* offset) {
LinearBVHNode* linearNode = &nodes[*offset];
linearNode->bounds = node->box;
int myOffset = (*offset)++;
if (node->nPrimitives > 0) {
linearNode->primitivesOffset = node->firstPrimOffset;
linearNode->nPrimitives = node->nPrimitives;
}
else {
// Create interior flattened BVH node
linearNode->axis = node->splitAxis;
linearNode->nPrimitives = 0;
flattenBVHTree(node->children[0], offset);
linearNode->secondChildOffset = flattenBVHTree(node->children[1], offset);
}
return myOffset;
}
My Point3D.hpp
#include <cstdint>
#pragma once
struct Point3D {
float x;
float y;
float z;
Point3D(uint32_t, uint32_t, uint32_t);
Point3D();
int operator[](int);
int operator[](int) const;
Point3D operator+(int);
Point3D operator-(int);
Point3D operator-(Point3D&);
};
Point3D::Point3D() {
x = 0;
y = 0;
z = 0;
}
Point3D::Point3D(uint32_t x, uint32_t y, uint32_t z) {
this->x = x;
this->y = y;
this->z = z;
}
bool operator<(Point3D a, Point3D b) {
uint32_t xSquare = a.x * a.x;
uint32_t ySquare = a.y * a.y;
uint32_t zSquare = a.z * a.z;
uint32_t x2Square = b.x * b.x;
uint32_t y2Square = b.y * b.y;
uint32_t z2Square = b.z * b.z;
int64_t sum = std::sqrt(xSquare + ySquare + z2Square) - std::sqrt(x2Square + y2Square + z2Square);
return sum < 0 ||
sum == 0 && xSquare < x2Square ||
sum == 0 && xSquare == x2Square && ySquare < y2Square ||
sum == 0 && xSquare == x2Square && ySquare == y2Square && zSquare < z2Square;
}
bool operator>(Point3D a, Point3D b) {
uint32_t xSquare = a.x * a.x;
uint32_t ySquare = a.y * a.y;
uint32_t zSquare = a.z * a.z;
uint32_t x2Square = b.x * b.x;
uint32_t y2Square = b.y * b.y;
uint32_t z2Square = b.z * b.z;
int32_t sum = std::sqrt(xSquare + ySquare + z2Square) - std::sqrt(x2Square + y2Square + z2Square);
return sum > 0 ||
sum == 0 && xSquare > x2Square ||
sum == 0 && xSquare == x2Square && ySquare > y2Square ||
sum == 0 && xSquare == x2Square && ySquare == y2Square && zSquare > z2Square;
}
int Point3D::operator[](int i) {
if (i == 0) return x;
if (i == 1) return y;
return z;
}
Point3D Point3D::operator+(int i) {
this->x += i;
this->y += i;
this->z += i;
return *this;
}
Point3D Point3D::operator-(const int i) {
this->x -= i;
this->y -= i;
this->z -= i;
return *this;
}
Point3D Point3D::operator-(Point3D& p) {
this->x -= p.x;
this->y -= p.y;
this->z -= p.z;
return *this;
}
int Point3D::operator[](const int i) const {
if (i == 0) return x;
if (i == 1) return y;
return z;
}
My BoundBox.hpp
#include "Point3D.hpp"
#include "Vector3D.hpp"
#pragma once
struct BoundBox {
Point3D pMin;
Point3D pMax;
BoundBox(Point3D);
BoundBox(Point3D, Point3D);
BoundBox();
void setBounds(BoundBox);
void Union(BoundBox);
BoundBox Union(BoundBox&, Point3D&);
BoundBox Union(BoundBox, BoundBox);
BoundBox unite(BoundBox, BoundBox);
BoundBox unite(BoundBox);
const Point3D offset(const Point3D&);
Point3D diagonal();
const int MaximumExtent();
float surfaceArea();
};
BoundBox::BoundBox() {
float minNum = 0;
pMin = Point3D(800, 600, 300);
pMax = Point3D(minNum, minNum, minNum);
}
BoundBox::BoundBox(Point3D p){
pMin = p;
pMax = p;
}
BoundBox::BoundBox(Point3D p1, Point3D p2) {
pMin = Point3D(std::min(p1.x, p2.x), std::min(p1.y, p2.y), std::min(p1.z, p2.z));
pMax = Point3D(std::max(p1.x, p2.x), std::max(p1.y, p2.y), std::max(p1.z, p2.z));
}
BoundBox BoundBox::Union(BoundBox& box, Point3D& p) {
BoundBox newBox;
newBox.pMin = Point3D(std::min(box.pMin.x, p.x), std::min(box.pMin.y, p.y), std::min(box.pMin.z, p.z));
newBox.pMax = Point3D(std::max(box.pMax.x, p.x), std::max(box.pMax.y, p.y), std::max(box.pMax.z, p.z));
return newBox;
}
BoundBox BoundBox::Union(BoundBox box1, BoundBox box2) {
BoundBox newBox;
newBox.pMin = std::min(box1.pMin, box2.pMin);
newBox.pMax = std::max(box1.pMax, box2.pMax);
return newBox;
}
BoundBox Union(BoundBox box1, BoundBox box2) {
BoundBox newBox;
newBox.pMin = std::min(box1.pMin, box2.pMin);
newBox.pMax = std::max(box1.pMax, box2.pMax);
return newBox;
}
BoundBox BoundBox::unite(BoundBox b1, BoundBox b2) {
bool x = (b1.pMax.x >= b2.pMin.x) && (b1.pMin.x <= b2.pMax.x);
bool y = (b1.pMax.y >= b2.pMin.y) && (b1.pMin.y <= b2.pMax.y);
bool z = (b1.pMax.z >= b2.pMin.z) && (b1.pMin.z <= b2.pMax.z);
if (x && y && z) {
return Union(b1, b2);
}
}
BoundBox BoundBox::unite(BoundBox b2) {
bool x = (this->pMax.x >= b2.pMin.x) && (this->pMin.x <= b2.pMax.x);
bool y = (this->pMax.y >= b2.pMin.y) && (this->pMin.y <= b2.pMax.y);
bool z = (this->pMax.z >= b2.pMin.z) && (this->pMin.z <= b2.pMax.z);
if (x && y && z) {
return Union(*this, b2);
}
else return *this;
}
const int BoundBox::MaximumExtent() {
Point3D d = Point3D(this->pMax.x - this->pMin.x, this->pMax.y - this->pMin.y, this->pMax.z - this->pMin.z); // diagonal
if (d.x > d.y && d.x > d.z) {
return 0;
}
else if (d.y > d.z) {
return 1;
}
else {
return 2;
}
}
float BoundBox::surfaceArea() {
Point3D d = Point3D(this->pMax.x - this->pMin.x, this->pMax.y - this->pMin.y, this->pMax.z - this->pMin.z); // diagonal
return 2 * (d.x * d.y + d.x * d.z + d.y * d.z);
}
const Point3D BoundBox::offset(const Point3D& p) {
Point3D o = Point3D(p.x - pMin.x, p.y - pMin.y, p.z - pMin.z);
if (pMax.x > pMin.x) o.x /= pMax.x - pMin.x;
if (pMax.y > pMin.y) o.y /= pMax.y - pMin.y;
if (pMax.z > pMin.z) o.z /= pMax.z - pMin.z;
return o;
}
My memory.hpp
#include <list>
#include <cstddef>
#include <algorithm>
#include <malloc.h>
#include <stdlib.h>
#pragma once
#define ARENA_ALLOC(arena, Type) new ((arena).Alloc(sizeof(Type))) Type
void* AllocAligned(size_t size);
template <typename T>
T* AllocAligned(size_t count) {
return (T*)AllocAligned(count * sizeof(T));
}
void FreeAligned(void*);
class
#ifdef PBRT_HAVE_ALIGNAS
alignas(PBRT_L1_CACHE_LINE_SIZE)
#endif // PBRT_HAVE_ALIGNAS
MemoryArena {
public:
// MemoryArena Public Methods
MemoryArena(size_t blockSize = 262144) : blockSize(blockSize) {}
~MemoryArena() {
FreeAligned(currentBlock);
for (auto& block : usedBlocks) FreeAligned(block.second);
for (auto& block : availableBlocks) FreeAligned(block.second);
}
void* Alloc(size_t nBytes) {
// Round up _nBytes_ to minimum machine alignment
#if __GNUC__ == 4 && __GNUC_MINOR__ < 9
// gcc bug: max_align_t wasn't in std:: until 4.9.0
const int align = alignof(::max_align_t);
#elif !defined(PBRT_HAVE_ALIGNOF)
const int align = 16;
#else
const int align = alignof(std::max_align_t);
#endif
#ifdef PBRT_HAVE_CONSTEXPR
static_assert(IsPowerOf2(align), "Minimum alignment not a power of two");
#endif
nBytes = (nBytes + align - 1) & ~(align - 1);
if (currentBlockPos + nBytes > currentAllocSize) {
// Add current block to _usedBlocks_ list
if (currentBlock) {
usedBlocks.push_back(
std::make_pair(currentAllocSize, currentBlock));
currentBlock = nullptr;
currentAllocSize = 0;
}
// Get new block of memory for _MemoryArena_
// Try to get memory block from _availableBlocks_
for (auto iter = availableBlocks.begin();
iter != availableBlocks.end(); ++iter) {
if (iter->first >= nBytes) {
currentAllocSize = iter->first;
currentBlock = iter->second;
availableBlocks.erase(iter);
break;
}
}
if (!currentBlock) {
currentAllocSize = std::max(nBytes, blockSize);
currentBlock = AllocAligned<uint8_t>(currentAllocSize);
}
currentBlockPos = 0;
}
void* ret = currentBlock + currentBlockPos;
currentBlockPos += nBytes;
return ret;
}
template <typename T>
T* Alloc(size_t n = 1, bool runConstructor = true) {
T* ret = (T*)Alloc(n * sizeof(T));
if (runConstructor)
for (size_t i = 0; i < n; ++i) new (&ret[i]) T();
return ret;
}
void Reset() {
currentBlockPos = 0;
availableBlocks.splice(availableBlocks.begin(), usedBlocks);
}
size_t TotalAllocated() const {
size_t total = currentAllocSize;
for (const auto& alloc : usedBlocks) total += alloc.first;
for (const auto& alloc : availableBlocks) total += alloc.first;
return total;
}
private:
MemoryArena(const MemoryArena&) = delete;
MemoryArena & operator=(const MemoryArena&) = delete;
// MemoryArena Private Data
const size_t blockSize;
size_t currentBlockPos = 0, currentAllocSize = 0;
uint8_t * currentBlock = nullptr;
std::list<std::pair<size_t, uint8_t*>> usedBlocks, availableBlocks;
};
template <typename T, int logBlockSize>
class BlockedArray {
public:
// BlockedArray Public Methods
BlockedArray(int uRes, int vRes, const T* d = nullptr)
: uRes(uRes), vRes(vRes), uBlocks(RoundUp(uRes) >> logBlockSize) {
int nAlloc = RoundUp(uRes) * RoundUp(vRes);
data = AllocAligned<T>(nAlloc);
for (int i = 0; i < nAlloc; ++i) new (&data[i]) T();
if (d)
for (int v = 0; v < vRes; ++v)
for (int u = 0; u < uRes; ++u) (*this)(u, v) = d[v * uRes + u];
}
const int BlockSize() const { return 1 << logBlockSize; }
int RoundUp(int x) const {
return (x + BlockSize() - 1) & ~(BlockSize() - 1);
}
int uSize() const { return uRes; }
int vSize() const { return vRes; }
~BlockedArray() {
for (int i = 0; i < uRes * vRes; ++i) data[i].~T();
FreeAligned(data);
}
int Block(int a) const { return a >> logBlockSize; }
int Offset(int a) const { return (a & (BlockSize() - 1)); }
T& operator()(int u, int v) {
int bu = Block(u), bv = Block(v);
int ou = Offset(u), ov = Offset(v);
int offset = BlockSize() * BlockSize() * (uBlocks * bv + bu);
offset += BlockSize() * ov + ou;
return data[offset];
}
const T & operator()(int u, int v) const {
int bu = Block(u), bv = Block(v);
int ou = Offset(u), ov = Offset(v);
int offset = BlockSize() * BlockSize() * (uBlocks * bv + bu);
offset += BlockSize() * ov + ou;
return data[offset];
}
void GetLinearArray(T * a) const {
for (int v = 0; v < vRes; ++v)
for (int u = 0; u < uRes; ++u) * a++ = (*this)(u, v);
}
private:
// BlockedArray Private Data
T * data;
const int uRes, vRes, uBlocks;
};
void* AllocAligned(size_t size) {
return _aligned_malloc(size, 32);
}
void FreeAligned(void* ptr) {
if (!ptr) return;
_aligned_free(ptr);
}
and My Source.cpp
#include <iostream>
#include <vector>
#include <chrono>
#include "Point3D.hpp"
#include "Screen.hpp"
#include "BVH.hpp"
#define N 150
int main(){
auto startTime = std::chrono::high_resolution_clock::now();
Screen* screen = new Screen(800, 600, 300);
screen->generatePoints(N);
//for (MortonPrimitive m : mortonPrims) {
// std::cout << m.mortonCode << std::endl;
//}
std::vector<std::shared_ptr<Primitive>> primitives;
primitives.reserve(N);
for (int i = 0; i < N; i++) {
primitives.emplace_back(screen->castPointToPrimitive(i));
}
BVH test(primitives);
auto endTime = std::chrono::high_resolution_clock::now();
std::cout << "Time spent: " << std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count() << "ms\n";
getchar();
delete screen;
}
Probably it would be wise to first cleanup your github. This mean update stuff to the recent c++ standard. It seems that you can use c++17 so use it. Also please look at some names. For example 'nodes' is used as member variable as well as parameter name, this is confusion. Please also initialize relevant (all) member variables.
Now it seems that the code in buildSAH override memory. It seems that it it can write over the end of buckets array.

Vector.push_back(...) read access violation

I don't know why but my program triggers a breakpoint on a line on the first iterations of 2 embedded loops here is the line:
pointerHolder->linkedVertices.push_back(&sphereApproximation.vertices.back());
Here is the section within which this resides (the line is near the bottom):
static const vertice holder[6] = { vertice(0,r,0,0), vertice(r,0,0,0), vertice(0,0,r,0), vertice(0,-r,0,0), vertice(-r,0,0,0), vertice(0,0,-r,0) };
std::vector<vertice> vertices (holder, holder + (sizeof(holder) / sizeof(vertice)));
shape sphereApproximation = shape(0, vertices);
int count;
for (int i = 0; i < 6; i++) {
count = i;
for (int t = 0; t < 5; t++) {
if (count == 5) {
count = 0;
}
else {
count++;
}
if (t != 2) {
sphereApproximation.vertices[i].linkedVertices.push_back(&sphereApproximation.vertices[count]);
}
}
}
bool * newConnection = new bool[pow(sphereApproximation.vertices.size(), 2) - sphereApproximation.vertices.size()]();
vertice * pointerHolder;
for (int i = 0; i < sphereApproximation.vertices.size(); i++) {
for (int t = 0; t < sphereApproximation.vertices[i].linkedVertices.size(); t++) {
if (!newConnection[(i * (sphereApproximation.vertices.size() - 1)) + t]) {
pointerHolder = sphereApproximation.vertices[i].linkedVertices[t];
sphereApproximation.vertices.push_back(newVertice(&sphereApproximation.vertices[i], pointerHolder, accuracyIterator + 1));
for (int q = 0; q < pointerHolder->linkedVertices.size(); q++) {
if (pointerHolder->linkedVertices[q] == &sphereApproximation.vertices[i]) {
pointerHolder->linkedVertices.erase(pointerHolder->linkedVertices.begin() + q);
break;
}
}
sphereApproximation.vertices[i].linkedVertices.erase(sphereApproximation.vertices[i].linkedVertices.begin() + t);
sphereApproximation.vertices[i].linkedVertices.push_back(&sphereApproximation.vertices.back());
std::cout << "gets here" << std::endl;
pointerHolder->linkedVertices.push_back(&sphereApproximation.vertices.back());
std::cout << "does not get here" << std::endl;
sphereApproximation.vertices.back().linkedVertices.push_back(&sphereApproximation.vertices[i]);
sphereApproximation.vertices.back().linkedVertices.push_back(pointerHolder);
}
}
}
I know the declaration for the newVertice(...) subroutine is missing, but I thought it was rather unnecessary, all that needs to be known is that its return type is vertice and it does return a vertice as I have tested. Here are the declerations of the structs I'm using:
struct vertice {
int accuracy;
double x, y, z;
std::vector<vertice*> linkedVertices;
vertice(double x, double y, double z, std::vector<vertice*> linkedVertices) {
this->x = x;
this->y = y;
this->z = z;
this->linkedVertices = linkedVertices;
}
vertice(double x, double y, double z, int accuracy) {
this->x = x;
this->y = y;
this->z = z;
this->accuracy = accuracy;
}
};
struct shape {
double center;
std::vector<vertice> vertices;
shape(double center, std::vector<vertice> vertices) {
this->center = center;
this->vertices = vertices;
}
};
If I've failed to provide anything please drop a comment and I shall amend my question.

Eigen C++ Assertion Failed

I have an Eigen MatrixXd object, called v, and I am facing some problems when trying to access this matrix content. When I only print the content at the console (as in the code), works just as fine. When I try to use the content, the error shows up:
Assertion failed: (row >= 0 && row < rows() && col >= 0 && col < cols()), function operator(), file /usr/local/Cellar/eigen/3.2.4/include/eigen3/Eigen/src/Core/DenseCoeffsBase.h, line 337.
ChosenPoint ** points = new ChosenPoint*[width];
for (int i = 0; i < width; i++)
{
points[i] = new ChosenPoint[height];
for (int j = 0; j < height; j++)
{
points[i][j].setPoint(i, j, false);
points[i][j].setNumberOfFrames(numberOfFrames);
}
}
Matrix<double, 2, 1> v = (aT * a).inverse() * aT * b;
if (v.rows() == 2 && v.cols() == 1)
{
points[x][y].setFlow(v(0,0), v(1,0), frame);
}
And my ChosenPoint class:
typedef struct point
{
double x;
double y;
bool isValid;
} point;
class ChosenPoint
{
public:
ChosenPoint()
{
}
~ChosenPoint()
{
}
void setNumberOfFrames(int numberOfFrames)
{
this->flow = new point[numberOfFrames];
for (int i = 0; i < numberOfFrames; i++)
{
point f;
f.x = 0.0;
f.y = 0.0;
this->flow[i] = f;
}
}
void setPoint(int x, int y, bool isValid)
{
this->pt.x = (double) x;
this->pt.y = (double) y;
this->pt.isValid = isValid;
}
point getPoint()
{
return this->pt;
}
point* getFlow()
{
return this->flow;
}
void setFlow(double &xFlow, double &yFlow, int &position)
{
this->flow[position].x = xFlow;
this->flow[position].y = yFlow;
}
void updateFlow(int position)
{
this->flow[position].x = 2*this->flow[position].x;
this->flow[position].y = 2*this->flow[position].y;
}
void updateFlow(double xFlow, double yFlow, int position)
{
this->flow[position].x = xFlow;
this->flow[position].y = yFlow;
}
point pt;
point *flow;
};
My fault. The problem was with one of the other matrixes that I was using in the project, and took me a while to figure it out. Unfortunately, Eigen doesn`t seem to be really helpful when this happens:
I had 2 matrixes (A and B). The matrix with problem was A (somehow, some data was not loaded into the matrix). But when i multiplied A and B, it generated a new matrix C with some valid results (all my sanity checks were unuseful). I admit I don`t know a lot of Eigen.
Anyway, hope this is helpful for more people like me.

C++ time spent allocating vectors

I am trying to speed up a piece of code that is ran a total of 150,000,000 times.
I have analysed it using "Very Sleepy", which has indicated that the code is spending the most time in these 3 areas, shown in the image:
The code is as follows:
double nonLocalAtPixel(int ymax, int xmax, int y, int x , vector<nodeStructure> &nodeMST, int squareDimension, Mat &inputImage) {
vector<double> nodeWeights(8,0);
vector<double> nodeIntensities(8,0);
bool allZeroWeights = true;
int numberEitherside = (squareDimension - 1) / 2;
int index = 0;
for (int j = y - numberEitherside; j < y + numberEitherside + 1; j++) {
for (int i = x - numberEitherside; i < x + numberEitherside + 1; i++) {
// out of range or the centre pixel
if (j<0 || i<0 || j>ymax || i>xmax || (j == y && i == x)) {
index++;
continue;
}
else {
int centreNodeIndex = y*(xmax+1) + x;
int thisNodeIndex = j*(xmax+1) + i;
// add to intensity list
Scalar pixelIntensityScalar = inputImage.at<uchar>(j, i);
nodeIntensities[index] = ((double)*pixelIntensityScalar.val);
// find weight from p to q
float weight = findWeight(nodeMST, thisNodeIndex, centreNodeIndex);
if (weight!=0 && allZeroWeights) {
allZeroWeights = false;
}
nodeWeights[index] = (weight);
index++;
}
}
}
// find min b
int minb = -1;
int bCost = -1;
if (allZeroWeights) {
return 0;
}
else {
// iteratate all b values
for (int i = 0; i < nodeWeights.size(); i++) {
if (nodeWeights[i]==0) {
continue;
}
double thisbCost = nonLocalWithb(nodeIntensities[i], nodeIntensities, nodeWeights);
if (bCost<0 || thisbCost<bCost) {
bCost = thisbCost;
minb = nodeIntensities[i];
}
}
}
return minb;
}
Firstly, I assume the spent time indicated by Very Sleepy means that the majority of time is spent allocating the vector and deleting the vector?
Secondly, are there any suggestions to speed this code up?
Thanks
use std::array
reuse the vectors by passing it as an argument of the function or a global variable if possible (not aware of the structure of the code so I need more infos)
allocate one 16 vector size instead of two vectors of size 8. Will make your memory less fragmented
use parallelism if findWeight is thread safe (you need to provide more details on that too)