BVH Tree Construction - Compiling gives Random mistakes - c++

Much thanks for the help in additionally.
I'm trying to build a BVH Tree with Surface Area Heuristic, but everytime I compile my code it gives me random errors like:
"Access violation reading location"
"Run-Time Check Failure #2 - Stack around the variable 'x' was
corrupted."
"Stack overflow "
The errors happen in the BVH::buildSAH() function.
And I have tried to find a solution for the whole day, meaningless. Could it be something from the std::partition function or from sending variables with pointers to a recursion?
I'm reading from the book "Physically Based Rendering: From Theory to Implementation
By Matt Pharr, Greg Humphreys"
It works for 2 primitives in the area, but thats trivial...
If you would like to clone: https://github.com/vkaytsanov/MortonCode-BVH-KD
My BVH.hpp:
#include <vector>
#include <cassert>
#include <algorithm>
#include "memory.hpp"
#include "Screen.hpp"
#include "Point3D.hpp"
#include "BoundBox.hpp"
#pragma once
enum Axis{
X, Y, Z
};
struct MortonPrimitive{
int primitiveIndex;
uint32_t mortonCode;
};
struct BVHPrimitiveInfo {
BVHPrimitiveInfo() {}
BVHPrimitiveInfo(int primitiveNumber, const BoundBox& box) : primitiveNumber(primitiveNumber), box(box),
centroid(Point3D(box.pMin.x* 0.5f + box.pMax.x * 0.5f, box.pMin.y* 0.5f + box.pMax.y * 0.5f, box.pMin.z* 0.5f + box.pMax.z * 0.5f)) {}
int primitiveNumber;
BoundBox box;
Point3D centroid;
};
struct BVHNode {
void InitLeaf(int first, int n, const BoundBox& b) {
firstPrimOffset = first;
nPrimitives = n;
box = b;
children[0] = children[1] = nullptr;
}
void InitInterior(int axis, BVHNode* c0, BVHNode* c1) {
assert(c0 != NULL || c1 != NULL);
children[0] = c0;
children[1] = c1;
this->box = Union(c0->box, c1->box);
splitAxis = axis;
nPrimitives = 0;
}
BoundBox box;
BVHNode* children[2];
int splitAxis, firstPrimOffset, nPrimitives;
};
struct LinearBVHNode {
BoundBox bounds;
union {
int primitivesOffset; // leaf
int secondChildOffset; // interior
};
uint16_t nPrimitives; // 0 -> interior node
uint8_t axis; // interior node: xyz
uint8_t pad[1]; // ensure 32 byte total size
};
struct BVHLittleTree {
int startIndex;
int numPrimitives;
BVHNode* nodes;
};
struct BVH {
BVH(std::vector<std::shared_ptr<Primitive>> p) : primitives(std::move(p)) {
std::vector<BVHPrimitiveInfo> BVHPrimitives;
BVHPrimitives.reserve(primitives.size());
for (int i = 0; i < primitives.size(); i++) {
BVHPrimitives.push_back({ i, primitives[i]->box });
}
MemoryArena arena(1024 * 1024);
int totalNodes = 0;
std::vector<std::shared_ptr<Primitive>> orderedPrimitives;
orderedPrimitives.reserve(primitives.size());
BVHNode* root;
root = HLBVHBuild(arena, BVHPrimitives, &totalNodes, orderedPrimitives);
primitives.swap(orderedPrimitives);
BVHPrimitives.resize(0);
printf("BVH created with %d nodes for %d "
"primitives (%.4f MB), arena allocated %.2f MB\n",
(int)totalNodes, (int)primitives.size(),
float(totalNodes * sizeof(LinearBVHNode)) /
(1024.f * 1024.f),
float(arena.TotalAllocated()) /
(1024.f * 1024.f));
assert(root != NULL);
nodes = AllocAligned<LinearBVHNode>(totalNodes);
int offset = 0;
flattenBVHTree(root, &offset);
}
~BVH() { FreeAligned(nodes); }
BVHNode* build(std::vector<MortonPrimitive>&, std::vector<Primitive>&);
BVHNode* HLBVHBuild(MemoryArena& arena, const std::vector<BVHPrimitiveInfo>& BVHPrimitives, int* totalNodes, std::vector<std::shared_ptr<Primitive>>& orderedPrims);
BVHNode* emit(BVHNode*& nodes, const std::vector<BVHPrimitiveInfo>& BVHPrimitives, MortonPrimitive* mortonPrimitives, std::vector<std::shared_ptr<Primitive>>&, int, int*, int*, int);
BVHNode* buildSAH(MemoryArena& arena, std::vector<BVHNode*>& treeRoots, int start, int end, int* total) const;
int flattenBVHTree(BVHNode*, int*);
std::vector<std::shared_ptr<Primitive>> primitives;
LinearBVHNode* nodes = nullptr;
int maxPrimsInNode = 1;
};
inline uint32_t LeftShift3(uint32_t x) {
if (x == (1 << 10)) --x;
x = (x | (x << 16)) & 0b00000011000000000000000011111111;
x = (x | (x << 8)) & 0b00000011000000001111000000001111;
x = (x | (x << 4)) & 0b00000011000011000011000011000011;
x = (x | (x << 2)) & 0b00001001001001001001001001001001;
return x;
}
uint32_t EncodeMorton3(const Point3D& p) {
return (LeftShift3(p.z) << 2) |
(LeftShift3(p.y) << 1) |
(LeftShift3(p.x) << 0);
}
short bitValue(uint32_t& number, uint32_t& mask) {
return number & mask ? 1 : 0;
}
static void radixSort(std::vector<MortonPrimitive>* v)
{
std::vector<MortonPrimitive> tempVector(v->size());
const int bitsPerPass = 6;
const int nBits = 30;
static_assert((nBits % bitsPerPass) == 0,
"Radix sort bitsPerPass must evenly divide nBits");
const int nPasses = nBits / bitsPerPass;
for (int pass = 0; pass < nPasses; ++pass) {
// Perform one pass of radix sort, sorting _bitsPerPass_ bits
int lowBit = pass * bitsPerPass;
// Set in and out vector pointers for radix sort pass
std::vector<MortonPrimitive>& in = (pass & 1) ? tempVector : *v;
std::vector<MortonPrimitive>& out = (pass & 1) ? *v : tempVector;
// Count number of zero bits in array for current radix sort bit
const int nBuckets = 1 << bitsPerPass;
int bucketCount[nBuckets] = { 0 };
const int bitMask = (1 << bitsPerPass) - 1;
for (const MortonPrimitive& mp : in) {
int bucket = (mp.mortonCode >> lowBit) & bitMask;
++bucketCount[bucket];
}
// Compute starting index in output array for each bucket
int outIndex[nBuckets];
outIndex[0] = 0;
for (int i = 1; i < nBuckets; ++i)
outIndex[i] = outIndex[i - 1] + bucketCount[i - 1];
// Store sorted values in output array
for (const MortonPrimitive& mp : in) {
int bucket = (mp.mortonCode >> lowBit) & bitMask;
out[outIndex[bucket]++] = mp;
}
}
// Copy final result from _tempVector_, if needed
if (nPasses & 1) std::swap(*v, tempVector);
}
//BVHNode* BVH::build(std::vector<MortonPrimitive>& mortonPrimitives, std::vector<Primitive>& prims) {
//
//
//}
struct BucketInfo {
int count = 0;
BoundBox bounds;
};
BVHNode* BVH::HLBVHBuild(MemoryArena& arena, const std::vector<BVHPrimitiveInfo>& BVHPrimitives, int* totalNodes, std::vector<std::shared_ptr<Primitive>>& orderedPrims) {
BoundBox box;
for (const BVHPrimitiveInfo& pi : BVHPrimitives) {
box = box.Union(box, pi.centroid); // maybe it should be UNION #TODO
}
std::vector<MortonPrimitive> mortonPrims(BVHPrimitives.size());
for (int i = 0; i < BVHPrimitives.size(); i++) {
const int mortonBits = 10;
const int mortonScale = 1 << mortonBits;
mortonPrims[i].primitiveIndex = BVHPrimitives[i].primitiveNumber;
Point3D p = box.offset(BVHPrimitives[i].centroid);
p.x = p.x * mortonScale;
p.y = p.y * mortonScale;
p.z = p.z * mortonScale;
mortonPrims[i].mortonCode = EncodeMorton3(p);
}
radixSort(&mortonPrims);
//for (MortonPrimitive mp : mortonPrims) {
// std::cout << mp.primitiveIndex << " " << mp.mortonCode << std::endl;
//}
std::vector<BVHLittleTree> treesToBuild;
uint32_t mask = 0b00111111111111000000000000000000; // first 12 bits describe the position of the primitive
for (int start = 0, end = 1; end <= (int)mortonPrims.size(); end++) {
if (end == mortonPrims.size() || ((mortonPrims[start].mortonCode & mask) != (mortonPrims[end].mortonCode & mask))) {
int n = end - start;
int maxNodes = 2 * n;
BVHNode* nodes = arena.Alloc<BVHNode>(maxNodes, false);
treesToBuild.push_back({ start, n, nodes });
start = end;
}
}
int orderedPrimsOffset = 0;
orderedPrims.resize(primitives.size());
int nodesCreated = 0;
int firstBitIndex = 29 - 12;
for (int i = 0; i < treesToBuild.size(); i++) {
treesToBuild[i].nodes = BVH::emit(treesToBuild[i].nodes, BVHPrimitives, &mortonPrims[treesToBuild[i].startIndex], orderedPrims, treesToBuild[i].numPrimitives, &nodesCreated, &orderedPrimsOffset, firstBitIndex);
*totalNodes += nodesCreated;
}
totalNodes += nodesCreated;
std::vector<BVHNode*> finishedTrees;
finishedTrees.reserve(treesToBuild.size());
for (BVHLittleTree& tr : treesToBuild) {
finishedTrees.emplace_back(tr.nodes);
}
return buildSAH(arena, finishedTrees, 0, finishedTrees.size(), totalNodes);
}
BVHNode* BVH::emit(BVHNode*& nodes, const std::vector<BVHPrimitiveInfo>& BVHPrimitive, MortonPrimitive* mortonPrimitives, std::vector<std::shared_ptr<Primitive>>& orderedPrimitives, int primitivesCount, int* totalNodes, int* orderedPrimsOffset, int bitIndex) {
if (bitIndex == -1 || primitivesCount < maxPrimsInNode) {
(*totalNodes)++;
BVHNode* tmp = nodes++;
BoundBox box;
int firstPrimOffset = *orderedPrimsOffset;
for (int i = 0; i < primitivesCount; i++) {
int index = mortonPrimitives[i].primitiveIndex;
orderedPrimitives[firstPrimOffset + i] = primitives[index];
box = box.Union(box, BVHPrimitive[index].box);
}
tmp->InitLeaf(0, primitivesCount, box);
return tmp;
}
else {
int mask = 1 << bitIndex;
if ((mortonPrimitives[0].mortonCode & mask) == (mortonPrimitives[primitivesCount - 1].mortonCode & mask)){ // Next tree if nothing to split for this bit
return emit(nodes, BVHPrimitive, mortonPrimitives, orderedPrimitives, primitivesCount, totalNodes, orderedPrimsOffset, bitIndex - 1);
}
int start = 0;
int end = primitivesCount - 1;
while (start + 1 != end) {
int mid = (end - start) / 2 + start; // (start-end)/2
if ((mortonPrimitives[start].mortonCode & mask) == (mortonPrimitives[mid].mortonCode & mask)) {
start = mid;
}
else {
end = mid;
}
}
int split = end;
(*totalNodes)++;
BVHNode* tmp = nodes++;
BVHNode* lbvh[2];
lbvh[0] = emit(nodes, BVHPrimitive, mortonPrimitives, orderedPrimitives, split, totalNodes, orderedPrimsOffset, bitIndex-1);
lbvh[1] = emit(nodes, BVHPrimitive, &mortonPrimitives[split], orderedPrimitives, primitivesCount - split, totalNodes, orderedPrimsOffset, bitIndex - 1);
int axis = bitIndex % 3;
tmp->InitInterior(axis, lbvh[0], lbvh[1]);
return tmp;
}
}
BVHNode* BVH::buildSAH(MemoryArena& arena, std::vector<BVHNode*>& treeRoots, int start, int end, int* total) const {
int nodesCount = end - start;
if (nodesCount == 1) {
return treeRoots[start];
}
assert(nodesCount > 1);
(*total)++;
BVHNode* node = arena.Alloc<BVHNode>();
BoundBox box;
for (int i = start; i < end; i++) {
box = Union(box, treeRoots[i]->box);
}
BoundBox centroidBox;
for (int i = start; i < end; i++) {
Point3D centroid = Point3D((treeRoots[i]->box.pMin.x + treeRoots[i]->box.pMax.x) * 0.5f, (treeRoots[i]->box.pMin.y + treeRoots[i]->box.pMax.y) * 0.5f, (treeRoots[i]->box.pMin.z + treeRoots[i]->box.pMax.z) * 0.5f);
centroidBox = Union(centroidBox, centroid);
}
const int dimension = centroidBox.MaximumExtent();
const int nBuckets = 12;
struct Buckets {
int count = 0;
BoundBox box;
};
Buckets buckets[nBuckets];
for (int i = start; i < end; i++) {
float centroid = (treeRoots[i]->box.pMin[dimension] * 0.5f + treeRoots[i]->box.pMax[dimension] * 0.5f) ;
int b = nBuckets * ((centroid - centroidBox.pMin[dimension]) / (centroidBox.pMax[dimension] - centroidBox.pMin[dimension]));
if (b == nBuckets) b = nBuckets - 1;
//assert(b < nBuckets);
buckets[b].count++;
buckets[b].box = Union(buckets[b].box, treeRoots[i]->box);
}
float cost[nBuckets - 1];
for (int i = 0; i < nBuckets - 1; i++) {
BoundBox b0, b1;
int count0 = 0, count1 = 0;
for (int j = 0; j <= i; j++) {
b0 = Union(b0, buckets[j].box);
count0 += buckets[j].count;
}
for (int j = i+1; j < nBuckets; j++) {
b1 = Union(b1, buckets[j].box);
count1 += buckets[j].count;
}
cost[i] = (.125f + (count0 * b0.surfaceArea() + count1 * b1.surfaceArea())) / box.surfaceArea();
}
double minCost = cost[0];
int minCostSplitBucket = 0;
for (int i = 1; i < nBuckets - 1; ++i) {
if (cost[i] < minCost) {
minCost = cost[i];
minCostSplitBucket = i;
}
}
BVHNode** pmid = std::partition(&treeRoots[start], &treeRoots[end - 1] + 1, [=](const BVHNode* node) {
double centroid = (node->box.pMin[dimension]*0.5f + node->box.pMax[dimension] * 0.5f) ;
int b = nBuckets * ((centroid - centroidBox.pMin[dimension]) / (centroidBox.pMax[dimension] - centroidBox.pMin[dimension]));
if (b == nBuckets) b = nBuckets - 1;
return b <= minCostSplitBucket;
});
assert(pmid != NULL);
//std::cout << pmid << " " << &treeRoots[0];
int mid = pmid - &treeRoots[0];
//std::cout << start << " " << mid << std::endl;
//std::cout << mid << " " << end << std::endl;
std::cout << dimension << std::endl;
//assert(dimension < 3);
node->InitInterior(dimension, this->buildSAH(arena, treeRoots, start, mid, total), this->buildSAH(arena, treeRoots, mid, end, total));
return node;
}
int BVH::flattenBVHTree(BVHNode* node, int* offset) {
LinearBVHNode* linearNode = &nodes[*offset];
linearNode->bounds = node->box;
int myOffset = (*offset)++;
if (node->nPrimitives > 0) {
linearNode->primitivesOffset = node->firstPrimOffset;
linearNode->nPrimitives = node->nPrimitives;
}
else {
// Create interior flattened BVH node
linearNode->axis = node->splitAxis;
linearNode->nPrimitives = 0;
flattenBVHTree(node->children[0], offset);
linearNode->secondChildOffset = flattenBVHTree(node->children[1], offset);
}
return myOffset;
}
My Point3D.hpp
#include <cstdint>
#pragma once
struct Point3D {
float x;
float y;
float z;
Point3D(uint32_t, uint32_t, uint32_t);
Point3D();
int operator[](int);
int operator[](int) const;
Point3D operator+(int);
Point3D operator-(int);
Point3D operator-(Point3D&);
};
Point3D::Point3D() {
x = 0;
y = 0;
z = 0;
}
Point3D::Point3D(uint32_t x, uint32_t y, uint32_t z) {
this->x = x;
this->y = y;
this->z = z;
}
bool operator<(Point3D a, Point3D b) {
uint32_t xSquare = a.x * a.x;
uint32_t ySquare = a.y * a.y;
uint32_t zSquare = a.z * a.z;
uint32_t x2Square = b.x * b.x;
uint32_t y2Square = b.y * b.y;
uint32_t z2Square = b.z * b.z;
int64_t sum = std::sqrt(xSquare + ySquare + z2Square) - std::sqrt(x2Square + y2Square + z2Square);
return sum < 0 ||
sum == 0 && xSquare < x2Square ||
sum == 0 && xSquare == x2Square && ySquare < y2Square ||
sum == 0 && xSquare == x2Square && ySquare == y2Square && zSquare < z2Square;
}
bool operator>(Point3D a, Point3D b) {
uint32_t xSquare = a.x * a.x;
uint32_t ySquare = a.y * a.y;
uint32_t zSquare = a.z * a.z;
uint32_t x2Square = b.x * b.x;
uint32_t y2Square = b.y * b.y;
uint32_t z2Square = b.z * b.z;
int32_t sum = std::sqrt(xSquare + ySquare + z2Square) - std::sqrt(x2Square + y2Square + z2Square);
return sum > 0 ||
sum == 0 && xSquare > x2Square ||
sum == 0 && xSquare == x2Square && ySquare > y2Square ||
sum == 0 && xSquare == x2Square && ySquare == y2Square && zSquare > z2Square;
}
int Point3D::operator[](int i) {
if (i == 0) return x;
if (i == 1) return y;
return z;
}
Point3D Point3D::operator+(int i) {
this->x += i;
this->y += i;
this->z += i;
return *this;
}
Point3D Point3D::operator-(const int i) {
this->x -= i;
this->y -= i;
this->z -= i;
return *this;
}
Point3D Point3D::operator-(Point3D& p) {
this->x -= p.x;
this->y -= p.y;
this->z -= p.z;
return *this;
}
int Point3D::operator[](const int i) const {
if (i == 0) return x;
if (i == 1) return y;
return z;
}
My BoundBox.hpp
#include "Point3D.hpp"
#include "Vector3D.hpp"
#pragma once
struct BoundBox {
Point3D pMin;
Point3D pMax;
BoundBox(Point3D);
BoundBox(Point3D, Point3D);
BoundBox();
void setBounds(BoundBox);
void Union(BoundBox);
BoundBox Union(BoundBox&, Point3D&);
BoundBox Union(BoundBox, BoundBox);
BoundBox unite(BoundBox, BoundBox);
BoundBox unite(BoundBox);
const Point3D offset(const Point3D&);
Point3D diagonal();
const int MaximumExtent();
float surfaceArea();
};
BoundBox::BoundBox() {
float minNum = 0;
pMin = Point3D(800, 600, 300);
pMax = Point3D(minNum, minNum, minNum);
}
BoundBox::BoundBox(Point3D p){
pMin = p;
pMax = p;
}
BoundBox::BoundBox(Point3D p1, Point3D p2) {
pMin = Point3D(std::min(p1.x, p2.x), std::min(p1.y, p2.y), std::min(p1.z, p2.z));
pMax = Point3D(std::max(p1.x, p2.x), std::max(p1.y, p2.y), std::max(p1.z, p2.z));
}
BoundBox BoundBox::Union(BoundBox& box, Point3D& p) {
BoundBox newBox;
newBox.pMin = Point3D(std::min(box.pMin.x, p.x), std::min(box.pMin.y, p.y), std::min(box.pMin.z, p.z));
newBox.pMax = Point3D(std::max(box.pMax.x, p.x), std::max(box.pMax.y, p.y), std::max(box.pMax.z, p.z));
return newBox;
}
BoundBox BoundBox::Union(BoundBox box1, BoundBox box2) {
BoundBox newBox;
newBox.pMin = std::min(box1.pMin, box2.pMin);
newBox.pMax = std::max(box1.pMax, box2.pMax);
return newBox;
}
BoundBox Union(BoundBox box1, BoundBox box2) {
BoundBox newBox;
newBox.pMin = std::min(box1.pMin, box2.pMin);
newBox.pMax = std::max(box1.pMax, box2.pMax);
return newBox;
}
BoundBox BoundBox::unite(BoundBox b1, BoundBox b2) {
bool x = (b1.pMax.x >= b2.pMin.x) && (b1.pMin.x <= b2.pMax.x);
bool y = (b1.pMax.y >= b2.pMin.y) && (b1.pMin.y <= b2.pMax.y);
bool z = (b1.pMax.z >= b2.pMin.z) && (b1.pMin.z <= b2.pMax.z);
if (x && y && z) {
return Union(b1, b2);
}
}
BoundBox BoundBox::unite(BoundBox b2) {
bool x = (this->pMax.x >= b2.pMin.x) && (this->pMin.x <= b2.pMax.x);
bool y = (this->pMax.y >= b2.pMin.y) && (this->pMin.y <= b2.pMax.y);
bool z = (this->pMax.z >= b2.pMin.z) && (this->pMin.z <= b2.pMax.z);
if (x && y && z) {
return Union(*this, b2);
}
else return *this;
}
const int BoundBox::MaximumExtent() {
Point3D d = Point3D(this->pMax.x - this->pMin.x, this->pMax.y - this->pMin.y, this->pMax.z - this->pMin.z); // diagonal
if (d.x > d.y && d.x > d.z) {
return 0;
}
else if (d.y > d.z) {
return 1;
}
else {
return 2;
}
}
float BoundBox::surfaceArea() {
Point3D d = Point3D(this->pMax.x - this->pMin.x, this->pMax.y - this->pMin.y, this->pMax.z - this->pMin.z); // diagonal
return 2 * (d.x * d.y + d.x * d.z + d.y * d.z);
}
const Point3D BoundBox::offset(const Point3D& p) {
Point3D o = Point3D(p.x - pMin.x, p.y - pMin.y, p.z - pMin.z);
if (pMax.x > pMin.x) o.x /= pMax.x - pMin.x;
if (pMax.y > pMin.y) o.y /= pMax.y - pMin.y;
if (pMax.z > pMin.z) o.z /= pMax.z - pMin.z;
return o;
}
My memory.hpp
#include <list>
#include <cstddef>
#include <algorithm>
#include <malloc.h>
#include <stdlib.h>
#pragma once
#define ARENA_ALLOC(arena, Type) new ((arena).Alloc(sizeof(Type))) Type
void* AllocAligned(size_t size);
template <typename T>
T* AllocAligned(size_t count) {
return (T*)AllocAligned(count * sizeof(T));
}
void FreeAligned(void*);
class
#ifdef PBRT_HAVE_ALIGNAS
alignas(PBRT_L1_CACHE_LINE_SIZE)
#endif // PBRT_HAVE_ALIGNAS
MemoryArena {
public:
// MemoryArena Public Methods
MemoryArena(size_t blockSize = 262144) : blockSize(blockSize) {}
~MemoryArena() {
FreeAligned(currentBlock);
for (auto& block : usedBlocks) FreeAligned(block.second);
for (auto& block : availableBlocks) FreeAligned(block.second);
}
void* Alloc(size_t nBytes) {
// Round up _nBytes_ to minimum machine alignment
#if __GNUC__ == 4 && __GNUC_MINOR__ < 9
// gcc bug: max_align_t wasn't in std:: until 4.9.0
const int align = alignof(::max_align_t);
#elif !defined(PBRT_HAVE_ALIGNOF)
const int align = 16;
#else
const int align = alignof(std::max_align_t);
#endif
#ifdef PBRT_HAVE_CONSTEXPR
static_assert(IsPowerOf2(align), "Minimum alignment not a power of two");
#endif
nBytes = (nBytes + align - 1) & ~(align - 1);
if (currentBlockPos + nBytes > currentAllocSize) {
// Add current block to _usedBlocks_ list
if (currentBlock) {
usedBlocks.push_back(
std::make_pair(currentAllocSize, currentBlock));
currentBlock = nullptr;
currentAllocSize = 0;
}
// Get new block of memory for _MemoryArena_
// Try to get memory block from _availableBlocks_
for (auto iter = availableBlocks.begin();
iter != availableBlocks.end(); ++iter) {
if (iter->first >= nBytes) {
currentAllocSize = iter->first;
currentBlock = iter->second;
availableBlocks.erase(iter);
break;
}
}
if (!currentBlock) {
currentAllocSize = std::max(nBytes, blockSize);
currentBlock = AllocAligned<uint8_t>(currentAllocSize);
}
currentBlockPos = 0;
}
void* ret = currentBlock + currentBlockPos;
currentBlockPos += nBytes;
return ret;
}
template <typename T>
T* Alloc(size_t n = 1, bool runConstructor = true) {
T* ret = (T*)Alloc(n * sizeof(T));
if (runConstructor)
for (size_t i = 0; i < n; ++i) new (&ret[i]) T();
return ret;
}
void Reset() {
currentBlockPos = 0;
availableBlocks.splice(availableBlocks.begin(), usedBlocks);
}
size_t TotalAllocated() const {
size_t total = currentAllocSize;
for (const auto& alloc : usedBlocks) total += alloc.first;
for (const auto& alloc : availableBlocks) total += alloc.first;
return total;
}
private:
MemoryArena(const MemoryArena&) = delete;
MemoryArena & operator=(const MemoryArena&) = delete;
// MemoryArena Private Data
const size_t blockSize;
size_t currentBlockPos = 0, currentAllocSize = 0;
uint8_t * currentBlock = nullptr;
std::list<std::pair<size_t, uint8_t*>> usedBlocks, availableBlocks;
};
template <typename T, int logBlockSize>
class BlockedArray {
public:
// BlockedArray Public Methods
BlockedArray(int uRes, int vRes, const T* d = nullptr)
: uRes(uRes), vRes(vRes), uBlocks(RoundUp(uRes) >> logBlockSize) {
int nAlloc = RoundUp(uRes) * RoundUp(vRes);
data = AllocAligned<T>(nAlloc);
for (int i = 0; i < nAlloc; ++i) new (&data[i]) T();
if (d)
for (int v = 0; v < vRes; ++v)
for (int u = 0; u < uRes; ++u) (*this)(u, v) = d[v * uRes + u];
}
const int BlockSize() const { return 1 << logBlockSize; }
int RoundUp(int x) const {
return (x + BlockSize() - 1) & ~(BlockSize() - 1);
}
int uSize() const { return uRes; }
int vSize() const { return vRes; }
~BlockedArray() {
for (int i = 0; i < uRes * vRes; ++i) data[i].~T();
FreeAligned(data);
}
int Block(int a) const { return a >> logBlockSize; }
int Offset(int a) const { return (a & (BlockSize() - 1)); }
T& operator()(int u, int v) {
int bu = Block(u), bv = Block(v);
int ou = Offset(u), ov = Offset(v);
int offset = BlockSize() * BlockSize() * (uBlocks * bv + bu);
offset += BlockSize() * ov + ou;
return data[offset];
}
const T & operator()(int u, int v) const {
int bu = Block(u), bv = Block(v);
int ou = Offset(u), ov = Offset(v);
int offset = BlockSize() * BlockSize() * (uBlocks * bv + bu);
offset += BlockSize() * ov + ou;
return data[offset];
}
void GetLinearArray(T * a) const {
for (int v = 0; v < vRes; ++v)
for (int u = 0; u < uRes; ++u) * a++ = (*this)(u, v);
}
private:
// BlockedArray Private Data
T * data;
const int uRes, vRes, uBlocks;
};
void* AllocAligned(size_t size) {
return _aligned_malloc(size, 32);
}
void FreeAligned(void* ptr) {
if (!ptr) return;
_aligned_free(ptr);
}
and My Source.cpp
#include <iostream>
#include <vector>
#include <chrono>
#include "Point3D.hpp"
#include "Screen.hpp"
#include "BVH.hpp"
#define N 150
int main(){
auto startTime = std::chrono::high_resolution_clock::now();
Screen* screen = new Screen(800, 600, 300);
screen->generatePoints(N);
//for (MortonPrimitive m : mortonPrims) {
// std::cout << m.mortonCode << std::endl;
//}
std::vector<std::shared_ptr<Primitive>> primitives;
primitives.reserve(N);
for (int i = 0; i < N; i++) {
primitives.emplace_back(screen->castPointToPrimitive(i));
}
BVH test(primitives);
auto endTime = std::chrono::high_resolution_clock::now();
std::cout << "Time spent: " << std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count() << "ms\n";
getchar();
delete screen;
}

Probably it would be wise to first cleanup your github. This mean update stuff to the recent c++ standard. It seems that you can use c++17 so use it. Also please look at some names. For example 'nodes' is used as member variable as well as parameter name, this is confusion. Please also initialize relevant (all) member variables.
Now it seems that the code in buildSAH override memory. It seems that it it can write over the end of buckets array.

Related

Purpose of custom BitSet implementation in this Leetcode answer

Last night I was working on the "Longest Palindromic Subsequence" problem on leetcode. After completing it I took a look at the fastest answer, and to my surprise it was a giant custom bitset implementation. I decided to try and reverse engineer it a bit and see if I could implement it using std::bitset, but I've run into some issues.
Here's the code:
#if __cplusplus>199711L //c++11
#include<unordered_map>
#endif
const int N=1005;
template<int S>
struct BitSet{
#define W 6
#define mask 63
#define get_size(n) ((n)<1?0:((n)+mask)>>W)
typedef unsigned long long uint; //typedef unsigned int uint;
uint a[get_size(S)];int size;
void reset(){memset(a,0,sizeof(uint)*size);}
BitSet():size(get_size(S)){reset();}
BitSet(uint x):size(get_size(S)){reset();a[0]=x;}
BitSet(const BitSet<S> &x):size(get_size(S)){*this=x;}
BitSet& set(int x,int y){
//if (y<0||y>1){printf("error!\n");return *this;}
int X=x>>W,Y=x&mask;
if (y)a[X]|=1ull<<Y;else a[X]&=~(1ull<<Y);
return *this;
}
int find(int x){int X=x>>W,Y=x&mask;return (a[X]>>Y)&1ull;}
int operator [](int x){return find(x);}
BitSet& operator =(const BitSet &y){
memcpy(a,y.a,sizeof(uint)*size);
return *this;
}
BitSet<S> operator |(const BitSet<S> &y)const{return BitSet<S>(*this)|=y;}
BitSet<S> operator &(const BitSet<S> &y)const{return BitSet<S>(*this)&=y;}
BitSet<S> operator ^(const BitSet<S> &y)const{return BitSet<S>(*this)^=y;}
BitSet<S> operator +(const BitSet<S> &y)const{return BitSet<S>(*this)+=y;}
BitSet<S> operator -(const BitSet<S> &y)const{return BitSet<S>(*this)-=y;}
BitSet<S> operator <<(int x)const{return BitSet<S>(*this)<<=x;}
BitSet<S> operator >>(int x)const{return BitSet<S>(*this)>>=x;}
BitSet<S> operator ~()const{return BitSet<S>(*this).flip();}
BitSet<S>& operator =(const char *s){
memset(a,0,sizeof(uint)*size);
for (int i=0;i<S;++i){
if (s[i]!='0'&&s[i]!='1')break;
int X=i>>W,Y=i&mask;
if (s[i]=='1')a[X]|=1ull<<Y;
}
return *this;
}
BitSet<S>& operator =(const int *s){
memset(a,0,sizeof(uint)*size);
for (int i=0;i<S;++i){
if (s[i]!=0&&s[i]!=1)break;
int X=i>>W,Y=i&mask;
if (s[i]==1)a[X]|=1ull<<Y;
}
return *this;
}
BitSet<S>& operator <<=(int x){
int shift=x>>W; int delta=x&mask,delta1=mask+1-delta;
if (!x)return *this;
if (delta==0)for (uint *p=a+size-1,*q=p-shift,*end=a+shift-1;p!=end;--p,--q)*p=*q;
else {
for (uint *p=a+size-1,*q1=p-shift,*q2=p-shift-1,*end=a+shift;p!=end;--p,--q1,--q2)*p=(*q1<<delta)|(*q2>>delta1);
a[shift]=a[0]<<delta;
}
memset(a,0,sizeof(uint)*shift); //for (uint *p=a,*end=a+shift;p!=end;++p)*p=0;
return *this;
}
BitSet<S>& operator >>=(int x){
int shift=x>>W; int delta=x&mask,delta1=mask+1-delta;
if (!x)return *this;
correction();
if (delta==0)for (uint *p=a,*q=p+shift,*end=a+size-shift;p!=end;++p,++q)*p=*q;
else {
for (uint *p=a,*q1=p+shift,*q2=p+shift+1,*end=a+size-shift-1;p!=end;++p,++q1,++q2)*p=(*q1>>delta)|(*q2<<delta1);
a[size-shift-1]=a[size-1]>>delta;
}
memset(a+size-shift,0,sizeof(uint)*shift);
return *this;
}
BitSet<S>& operator |=(const BitSet<S> &y){
uint *startA=a;const uint *startB=y.a,*endA=a+size;
while (startA!=endA){*startA|=*startB;++startA;++startB;}
//for (int i=0;i<size;++i)a[i]|=y.a[i];
return *this;
}
/*BitSet<S>& operator |=(const BitSet<S> &y){
uint *p0=a,*p1=p0+1,*p2=p0+2,*p3=p0+3;const uint *q0=y.a,*q1=q0+1,*q2=q0+2,*q3=q0+3,*pend=a+((size>>2)<<2);
while (p0!=pend){
*p0|=*q0; p0+=4; q0+=4;
*p1|=*q1; p1+=4; q1+=4;
*p2|=*q2; p2+=4; q2+=4;
*p3|=*q3; p3+=4; q3+=4;
}
for (int i=0;i<(size&3);++i)*p0++|=*q0++;
return *this;
}*/
BitSet<S>& operator &=(const BitSet<S> &y){
uint *startA=a;const uint *startB=y.a,*endA=a+size;
while (startA!=endA){*startA&=*startB;++startA;++startB;}
return *this;
}
BitSet<S>& operator ^=(const BitSet<S> &y){
uint *startA=a;const uint *startB=y.a,*endA=a+size;
while (startA!=endA){*startA^=*startB;++startA;++startB;}
return *this;
}
BitSet<S>& operator +=(const BitSet<S> &y){
uint t=0,*p=a,*end=a+size; const uint *q=y.a;
while (p!=end){
uint p1=*p; *p=p1+*q+t;
t=(*p<p1)||(p1+t<t);
++p; ++q;
}
return *this;
}
BitSet<S>& operator -=(const BitSet<S> &y){
uint t=0,*p=a,*end=a+size; const uint *q=y.a;
while (p!=end){
uint p1=*p; *p=p1-*q-t;
t=(*p>p1)||(p1+t<t);
++p; ++q;
}
return *this;
}
operator bool(){return count()>0;}
BitSet<S>& flip(){
//for (uint *start=a,*end=a+size;start!=end;*start=~*start,++start);
uint *p0=a,*p1=p0+1,*p2=p0+2,*p3=p0+3,*pend=a+((size>>2)<<2);
while (p0!=pend){
*p0=~*p0; p0+=4;
*p1=~*p1; p1+=4;
*p2=~*p2; p2+=4;
*p3=~*p3; p3+=4;
}
for (int i=0;i<(size&3);++i,++p0)*p0=~*p0;
return *this;
}
//void flip(){*this=~*this;}
void flip(int x){a[x>>W]^=1ull<<(x&mask);}
int popcount(uint x)const{
x-=(x&0xaaaaaaaaaaaaaaaaull)>>1;
x=((x&0xccccccccccccccccull)>>2)+(x&0x3333333333333333ull);
x=((x>>4)+x)&0x0f0f0f0f0f0f0f0full;
return (x*0x0101010101010101ull)>>56;
}
int count(){
int res=0;
correction();
for (int i=0;i<size;++i)res+=__builtin_popcountll(a[i]); //popcount
return res;
}
int clz(){
correction();
int res=0;
if (a[size-1])res=__builtin_clzll(a[size-1])-(mask+1-(S&mask));
else {
res+=S&mask;
for (int i=size-2;i>=0;--i)
if (a[i]){res+=__builtin_clzll(a[i]); break;}
else res+=mask+1;
}
return res;
}
int ctz(){
correction();
int res=0;
for (int i=0;i<size;++i)
if (a[i]){res+=__builtin_ctzll(a[i]); break;}
else res+=mask+1;
return min(res,S);
}
int ffs(){
int res=ctz()+1;
if (res==S+1)res=0;
return res;
}
uint to_uint(){
correction();
return a[0];
}
void print(){
for (int i=0;i<size;++i)
for (int j=0;j<=mask&&(i<<W)+j+1<=S;++j)printf("%I64d",(a[i]>>j)&1ull);
printf("\n");
}
void correction(){if (S&mask)a[size-1]&=(1ull<<(S&mask))-1;}
#undef mask
#undef W
#undef get_size
};
int a[N],b[N];
BitSet<N> row[2],X,Y;
unordered_map<int,vector<int> > S;
unordered_map<int,BitSet<N> > match;
class Solution {
public:
int longestPalindromeSubseq(string s) {
int n=s.size(),m=n;
S.clear();match.clear();row[1].reset();
for (int i=0;i<n;++i)a[i]=int(s[i]),S[a[i]].push_back(i);
for (int i=0;i<m;++i)b[i]=int(s[n-1-i]);
for (int i=0;i<m;++i)if (match.find(b[i])==match.end()){
unordered_map<int,BitSet<N> >::iterator x=match.insert(make_pair(b[i],BitSet<N>())).first;
for (vector<int>::iterator j=S[b[i]].begin();j!=S[b[i]].end();++j)x->second.set(*j,1);
}
for (int i=0,now=0;i<m;++i,now^=1)
X=(row[now^1]|match[b[i]]).set(n,1),row[now]=(X&((X-(row[now^1]<<1).set(0,1))^X)).set(n,0);
return row[(m-1)&1].count();
}
};
And here's my attempt at cleaning it up/understanding it:
Bitset.h:
#pragma once
//#if __cplusplus>199711L //c++11
#include<unordered_map>
//#endif
#include <intrin.h>
const int N = 1005; //this size doesn't even matter?? It's just enigmatically assigning a type of int, I guess??
template<int S>
struct BitSet {
#define W 6
#define mask 63
#define get_size(n) ((n)<1?0:((n)+mask)>>W)
//members
typedef unsigned long long uint; //typedef unsigned int uint; //not sure why they didn't just use size_t here...
uint a[get_size(S)]; //represents the "BitSet" I think...
int size;
//constructors
BitSet() :size(get_size(S)) { reset(); }
BitSet(uint x) :size(get_size(S)) { reset(); a[0] = x; }
BitSet(const BitSet<S>& x) :size(get_size(S)) { *this = x; }
//utility functions
BitSet& set(int x, int y) {
//if (y<0||y>1){printf("error!\n");return *this;}
int X = x >> W, Y = x & mask; //bit shift with the magic numbers??? //What is the significance of 6, 63, and 1005?
if (y)a[X] |= 1ull << Y; else a[X] &= ~(1ull << Y);
return *this;
}
void reset() { memset(a, 0, sizeof(uint) * size); }
int find(int x) { int X = x >> W, Y = x & mask; return (a[X] >> Y) & 1ull; }
//void flip(){*this=~*this;}
void flip(int x) { a[x >> W] ^= 1ull << (x & mask); }
int popcount(uint x)const {
x -= (x & 0xaaaaaaaaaaaaaaaaull) >> 1;
x = ((x & 0xccccccccccccccccull) >> 2) + (x & 0x3333333333333333ull);
x = ((x >> 4) + x) & 0x0f0f0f0f0f0f0f0full;
return (x * 0x0101010101010101ull) >> 56;
}
int count() {
int res = 0;
correction();
for (int i = 0; i < size; ++i)res += __popcnt(a[i]); //popcount // __builtin_popcountll is only available on GCC apparently. Substituted with library version.
return res;
}
int clz() {
correction();
int res = 0;
if (a[size - 1])res = __builtin_clzll(a[size - 1]) - (mask + 1 - (S & mask));
else {
res += S & mask;
for (int i = size - 2; i >= 0; --i)
if (a[i]) { res += __builtin_clzll(a[i]); break; }
else res += mask + 1;
}
return res;
}
int ctz() {
correction();
int res = 0;
for (int i = 0; i < size; ++i)
if (a[i]) { res += __builtin_ctzll(a[i]); break; }
else res += mask + 1;
return min(res, S);
}
int ffs() {
int res = ctz() + 1;
if (res == S + 1)res = 0;
return res;
}
uint to_uint() {
correction();
return a[0];
}
void print() {
for (int i = 0; i < size; ++i)
for (int j = 0; j <= mask && (i << W) + j + 1 <= S; ++j)printf("%I64d", (a[i] >> j) & 1ull);
printf("\n");
}
void correction() { if (S & mask)a[size - 1] &= (1ull << (S & mask)) - 1; }
BitSet<S>& flip() {
//for (uint *start=a,*end=a+size;start!=end;*start=~*start,++start);
uint* p0 = a, * p1 = p0 + 1, * p2 = p0 + 2, * p3 = p0 + 3, * pend = a + ((size >> 2) << 2);
while (p0 != pend) {
*p0 = ~*p0; p0 += 4;
*p1 = ~*p1; p1 += 4;
*p2 = ~*p2; p2 += 4;
*p3 = ~*p3; p3 += 4;
}
for (int i = 0; i < (size & 3); ++i, ++p0)*p0 = ~*p0;
return *this;
}
//operators
int operator [](int x) { return find(x); }
BitSet& operator =(const BitSet& y) {
memcpy(a, y.a, sizeof(uint) * size);
return *this;
}
BitSet<S>& operator =(const char* s) {
memset(a, 0, sizeof(uint) * size);
for (int i = 0; i < S; ++i) {
if (s[i] != '0' && s[i] != '1')break;
int X = i >> W, Y = i & mask;
if (s[i] == '1')a[X] |= 1ull << Y;
}
return *this;
}
BitSet<S>& operator =(const int* s) {
memset(a, 0, sizeof(uint) * size);
for (int i = 0; i < S; ++i) {
if (s[i] != 0 && s[i] != 1)break;
int X = i >> W, Y = i & mask;
if (s[i] == 1)a[X] |= 1ull << Y;
}
return *this;
}
BitSet<S> operator |(const BitSet<S>& y)const { return BitSet<S>(*this) |= y; }
BitSet<S> operator &(const BitSet<S>& y)const { return BitSet<S>(*this) &= y; }
BitSet<S> operator ^(const BitSet<S>& y)const { return BitSet<S>(*this) ^= y; }
BitSet<S> operator +(const BitSet<S>& y)const { return BitSet<S>(*this) += y; }
BitSet<S> operator -(const BitSet<S>& y)const { return BitSet<S>(*this) -= y; }
BitSet<S> operator <<(int x)const { return BitSet<S>(*this) <<= x; }
BitSet<S> operator >>(int x)const { return BitSet<S>(*this) >>= x; }
BitSet<S> operator ~()const { return BitSet<S>(*this).flip(); }
BitSet<S>& operator <<=(int x) {
int shift = x >> W; int delta = x & mask, delta1 = mask + 1 - delta;
if (!x)return *this;
if (delta == 0)for (uint* p = a + size - 1, *q = p - shift, *end = a + shift - 1; p != end; --p, --q)*p = *q;
else {
for (uint* p = a + size - 1, *q1 = p - shift, *q2 = p - shift - 1, *end = a + shift; p != end; --p, --q1, --q2)*p = (*q1 << delta) | (*q2 >> delta1);
a[shift] = a[0] << delta;
}
memset(a, 0, sizeof(uint) * shift); //for (uint *p=a,*end=a+shift;p!=end;++p)*p=0; //if this is a left shift... why is it setting the beginning of the "a" array to 0???... unless the damn array is "backwards". Sheesh.
//wait... this is a right shift according to https://orthallelous.wordpress.com/2019/10/24/magic-numbers-encoding-truth-tables-into-giant-single-values/ ... fuck.
return *this;
}
BitSet<S>& operator >>=(int x) {
int shift = x >> W; int delta = x & mask, delta1 = mask + 1 - delta;
if (!x)return *this;
correction();
if (delta == 0)for (uint* p = a, *q = p + shift, *end = a + size - shift; p != end; ++p, ++q)*p = *q;
else {
for (uint* p = a, *q1 = p + shift, *q2 = p + shift + 1, *end = a + size - shift - 1; p != end; ++p, ++q1, ++q2)*p = (*q1 >> delta) | (*q2 << delta1);
a[size - shift - 1] = a[size - 1] >> delta;
}
memset(a + size - shift, 0, sizeof(uint) * shift);
return *this;
}
BitSet<S>& operator |=(const BitSet<S>& y) {
uint* startA = a; const uint* startB = y.a, * endA = a + size;
while (startA != endA) { *startA |= *startB; ++startA; ++startB; }
//for (int i=0;i<size;++i)a[i]|=y.a[i];
return *this;
}
/*BitSet<S>& operator |=(const BitSet<S> &y){
uint *p0=a,*p1=p0+1,*p2=p0+2,*p3=p0+3;const uint *q0=y.a,*q1=q0+1,*q2=q0+2,*q3=q0+3,*pend=a+((size>>2)<<2);
while (p0!=pend){
*p0|=*q0; p0+=4; q0+=4;
*p1|=*q1; p1+=4; q1+=4;
*p2|=*q2; p2+=4; q2+=4;
*p3|=*q3; p3+=4; q3+=4;
}
for (int i=0;i<(size&3);++i)*p0++|=*q0++;
return *this;
}*/
BitSet<S>& operator &=(const BitSet<S>& y) {
uint* startA = a; const uint* startB = y.a, * endA = a + size;
while (startA != endA) { *startA &= *startB; ++startA; ++startB; }
return *this;
}
BitSet<S>& operator ^=(const BitSet<S>& y) {
uint* startA = a; const uint* startB = y.a, * endA = a + size;
while (startA != endA) { *startA ^= *startB; ++startA; ++startB; }
return *this;
}
BitSet<S>& operator +=(const BitSet<S>& y) {
uint t = 0, * p = a, * end = a + size; const uint* q = y.a;
while (p != end) {
uint p1 = *p; *p = p1 + *q + t;
t = (*p < p1) || (p1 + t < t);
++p; ++q;
}
return *this;
}
BitSet<S>& operator -=(const BitSet<S>& y) {
uint t = 0, * p = a, * end = a + size; const uint* q = y.a;
while (p != end) {
uint p1 = *p; *p = p1 - *q - t;
t = (*p > p1) || (p1 + t < t);
++p; ++q;
}
return *this;
}
operator bool() { return count() > 0; }
#undef mask
#undef W
#undef get_size
};
LeetCode516.cpp
// LeetCode516.cpp : This file contains the 'main' function. Program execution begins and ends there.
//
#include <iostream>
#include "BitSet.h"
#include <bitset>
// If parameter is not true, test fails
// This check function would be provided by the test framework
#define IS_TRUE(x) { if (!x) std::cout << __FUNCTION__ << " failed on line " << __LINE__ << std::endl; else std::cout << __FUNCTION__ << " passed" << std:: endl;}
int longestPalindromeSubseq(std::string s) {
int a[N] = { 0 }, b[N] = { 0 }; //2 integer arrays - initialzing to 0 isn't necessary, but seems to make the debug output more readable?
BitSet<N> row[2], X, Y; //3 bitsets... one of them is actually 2 (lol)
std::unordered_map<int, std::vector<int>> S; //This map tracks the number of occurrences of each letter?
std::unordered_map<int, BitSet<N>> match;
int n = s.size(), m = n;
S.clear();
match.clear();
//row[1].reset(); //WTF is this? Garbage.
//For each piece of the string, cast it from a char to an int and shove it in the first array, then push back a copy of each array into the UOMap S.
for (int i = 0; i < n; ++i) {
a[i] = static_cast<int>(s[i]); //changed C-style cast to static_cast
S[a[i]].push_back(i);
}
//Set the second array to a backwards copy of A I guess? (while converting the characters to integers ofc)
for (int i = 0; i < m; ++i) {
b[i] = int(s[n - 1 - i]);
}
for (int i = 0; i < m; ++i) {
//so if it's the first loop iteration or the find (working on the "backwards" array) matches the end, do this:
if (match.find(b[i]) == match.end()) { //how does this even run the first time? Match is getting cleared and never set? I guess it's working because "find(b[i])" and "end()" are both 0? - Kind of - turns out that if the find function fails, it returns "end"...
//Insert a new bitset with a "key" of "b[i]" into the match map.
auto x = match.insert(std::make_pair(b[i], BitSet<N>())).first; // replaced std::unordered_map<int, BitSet<N> >::iterator with auto... that's nice.
//Then using the iterator we just created above, loop through all entries in UOMap S under that key and ???
//Since we found it in "match", lets check for it(?) in UOMap S... and then set some bit in match based on how many entries there are in S?
for (auto j = S[b[i]].begin(); j != S[b[i]].end(); ++j) {
x->second.set(*j, 1); //this is setting some bit value in "match" to 1 I guess.
}
}
}
for (int i = 0, now = 0; i < m; ++i, now ^= 1) { //Is "*=" being intentionally obfuscated here to "^=" or is it actually different?? Ugh... nasty.
X = (row[now ^ 1] | match[b[i]]).set(n, 1); // if the caret is supposed to represent XOR, why does this still work whenever I replace it with a *... very funky.
row[now] = (X & ((X - (row[now ^ 1] << 1).set(0, 1)) ^ X)).set(n, 0);
}
return row[(m - 1) & 1].count();
}
template<int S>
std::bitset<S>& operator-(const std::bitset<S>&y) { return std::bitset<S>(*this) -= y; }
#define W 6
#define mask 63
#define get_size(n) ((n)<1?0:((n)+mask)>>W)
template<size_t S>
std::bitset<S>& operator-(std::bitset<S>& y, const std::bitset<S>& z) {
//return std::bitset<S>(*this) -= y;
size_t thing[get_size(S)];
size_t t = 0, * p = thing, * end = thing + get_size(y);
const size_t* q = z[0]; //z.a
while (p != end) {
size_t p1 = *p;
*p = p1 - *q - t;
t = (*p > p1) || (p1 + t < t);
++p;
++q;
}
return y;
}
template<size_t S>
std::bitset<S>& operator+(std::bitset<S>& y, const std::bitset<S>& z) {
size_t thing[get_size(S)];
size_t t = 0, * p = thing, * end = thing + get_size(y);
const size_t* q = z[0];
while (p != end) {
size_t p1 = *p; *p = p1 + *q + t;
t = (*p < p1) || (p1 + t < t);
++p;
++q;
}
return y;
}
template<size_t S>
std::bitset<S>& operator -=(const std::bitset<S>& y) {
uint t = 0, * p = a, * end = a + size;
const uint* q = y.a;
while (p != end) {
uint p1 = *p; *p = p1 - *q - t;
t = (*p > p1) || (p1 + t < t);
++p; ++q;
}
return *this;
}
int LPS_STL(std::string s) {
const int M = 1005;
int c[M], d[M];
std::bitset<M> row[2], X, Y;
std::unordered_map<int, std::vector<int> > S;
std::unordered_map<int, std::bitset<M> > match;
int n = s.size(), m = n;
S.clear();
match.clear();
row[1].reset();
for (int i = 0; i < n; ++i)c[i] = int(s[i]), S[c[i]].push_back(i);
for (int i = 0; i < m; ++i)d[i] = int(s[n - 1 - i]);
for (int i = 0; i < m; ++i)if (match.find(d[i]) == match.end()) {
std::unordered_map<int, std::bitset<M> >::iterator x = match.insert(std::make_pair(d[i], std::bitset<M>())).first;
for (std::vector<int>::iterator j = S[d[i]].begin(); j != S[d[i]].end(); ++j)x->second.set(*j, 1);
}
for (int i = 0, now = 0; i < m; ++i, now ^= 1)
X = (row[now ^ 1] | match[d[i]]).set(n, 1), row[now] = (X & (( X - (row[now ^ 1] << 1).set(0, 1)) ^ X)).set(n, 0);
return row[(m - 1) & 1].count();
}
void test1() {
int test = longestPalindromeSubseq("");
IS_TRUE(test == 0);
}
void test2() {
int test = longestPalindromeSubseq("bbbab");
IS_TRUE(test == 4);
}
void test3() {
int test = longestPalindromeSubseq("cbbd");
IS_TRUE(test == 2);
}
void test4() {
int test = longestPalindromeSubseq("cbbd");
IS_TRUE(test == 0); //purposefully fail
}
void test5() {
int test = longestPalindromeSubseq("cacbcbba");
IS_TRUE(test == 5);
}
void test6() {
int test = longestPalindromeSubseq("eeeecdeabfbeeb");
IS_TRUE(test == 7);
}
//Std::bitset tests
//void test7() {
// int test = LPS_STL("");
// IS_TRUE(test == 0);
//}
//void test8() {
// int test = LPS_STL("bbbab")
// IS_TRUE(test == 4);
//}
int main()
{
std::cout << "Longest Palindrome Subsequence\n";
test1();
test2();
test3();
test4();
test5();
test6();
test7();
test8();
}
Main question: Is there a core reason this person chose to implement a custom bitset instead of the std library?
Extraneous questions:
Is some of this code intentionally obfuscated?
It feels a bit naive to ask this, but do some of you intentionally code this way?
Why are they using the value of N (an int) as the type for their bitsets?
Some of their formulas are incredibly long.
Attempting to implement the necessary operators for the std::bitset implementation didn't go well. Can someone elucidate the reason these don't exist? I feel like there's a good reason for it regarding expected output, but I can't articulate it.
Is this question more appropriate for another SO site, such as Code Review?
Is some of this code intentionally obfuscated?
It is just faster to type
#define W 6
than
constexpr size_t bitcoverage { std::bitwidth(sizeof(int64_t)*CHAR_BIT)-1 }; // bits needed to represent 8*8=64 2^7 so 7-1=6 which 0-63 bits.
(if I got it right ...)
It feels a bit naive to ask this, but do some of you intentionally code this way?
When I started programming I did, now I have realized that code is read more than written and it could confuse people, including me, who read it later.
Why are they using the value of N (an int) as the type for their bitsets?
N is just how many entries there is max, see the link you provided.
It actually uses an int64_t to store the bits. Which for many operaton on most larger CPU's is far more efficient than the same doing the same operation on an byte as you can do or/and/etc on a lot of bits with just one instuction.
Some of their formulas are incredibly long.
I am not sure which your are speculating about.
Attempting to implement the necessary operators for the std::bitset implementation didn't go well. Can someone elucidate the reason these don't exist? I feel like there's a good reason for it regarding expected output, but I can't articulate it.
Again, what exact are you referring to?

Scaling png font down

Is there a way to scale down with highest quality a font which is png image in opengl at startup? I tried gluScaleImage but there are many artefacts. Is there anything that uses lanczos or something like that? I don't want to write a shader or anything that does the scaling runtime.
This is based on an algorithm, I copied decades ago from the German c't Magazin, and still use it from time to time for similar issues like described by OP.
bool scaleDown(
const Image &imgSrc,
Image &imgDst,
int w, int h,
int align)
{
const int wSrc = imgSrc.w(), hSrc = imgSrc.h();
assert(w > 0 && w <= wSrc && h > 0 && h <= hSrc);
// compute scaling factors
const double sx = (double)wSrc / (double)w;
const double sy = (double)hSrc / (double)h;
const double sxy = sx * sy;
// prepare destination image
imgDst.resize(w, h, (w * 3 + align - 1) / align * align);
// cache some data
const uint8 *const dataSrc = imgSrc.data();
const int bPRSrc = imgSrc.bPR();
// perform scaling
for (int y = 0; y < h; ++y) {
const double yStart = sy * y;
const double yEnd = std::min(sy * (y + 1), (double)hSrc);
const int yStartInt = (int)yStart;
const int yEndInt = (int)yEnd - (yEndInt == yEnd);
const double tFrm = 1 + yStartInt - yStart, bFrm = yEnd - yEndInt;
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc);
const int xStartInt = (int)xStart;
const int xEndInt = (int)xEnd - (xEndInt == xEnd);
double lFrm = 1 + xStartInt - xStart, rFrm = xEnd - xEndInt;
double pixel[3] = { 0.0, 0.0, 0.0 }; // values of target pixel
for (int i = yStartInt; i <= yEndInt; ++i) {
int jData = i * bPRSrc + xStartInt * 3;
for (int j = xStartInt; j <= xEndInt; ++j) {
double pixelAdd[3];
for (int k = 0; k < 3; ++k) {
pixelAdd[k] = (double)dataSrc[jData++] / sxy;
}
if (j == xStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= lFrm;
} else if (j == xEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= rFrm;
}
if (i == yStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= tFrm;
} else if (i == yEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= bFrm;
}
for (int k = 0; k < 3; ++k) pixel[k] += pixelAdd[k];
}
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
}
// done
return true;
}
If I got it right, this implements a bilinear interpolation.
I don't dare to call it a Minimal Complete Verifiable Example although this is what I intended to do.
The complete sample application:
A simplified class Image
image.h:
#ifndef IMAGE_H
#define IMAGE_H
#include <vector>
// convenience type for bytes
typedef unsigned char uint8;
// image helper class
class Image {
private: // variables:
int _w, _h; // image size
size_t _bPR; // bytes per row
std::vector<uint8> _data; // image data
public: // methods:
// constructor.
Image(): _w(0), _h(0), _bPR(0) { }
// destructor.
~Image() = default;
// copy constructor.
Image(const Image&) = delete; // = default; would work as well.
// copy assignment.
Image& operator=(const Image&) = delete; // = default; would work as well.
// returns width of image.
int w() const { return _w; }
// returns height of image.
int h() const { return _h; }
// returns bytes per row.
size_t bPR() const { return _bPR; }
// returns pointer to image data.
const uint8* data(
int y = 0) // row number
const {
return &_data[y * _bPR];
}
// returns data size (in bytes).
size_t size() const { return _data.size(); }
// clears image.
void clear();
// resizes image.
uint8* resize( // returns allocated buffer
int w, // image width
int h, // image height
int bPR); // bytes per row
// returns pixel.
int getPixel(
int x, // column
int y) // row
const;
// sets pixel.
void setPixel(
int x, // column
int y, // row
uint8 r, uint8 g, uint8 b);
// sets pixel.
void setPixel(
int x, // column
int y, // row
int value) // RGB value
{
setPixel(x, y, value & 0xff, value >> 8 & 0xff, value >> 16 & 0xff);
}
};
// helper functions:
inline uint8 getR(int value) { return value & 0xff; }
inline uint8 getG(int value) { return value >> 8 & 0xff; }
inline uint8 getB(int value) { return value >> 16 & 0xff; }
#endif // IMAGE_H
image.cc:
#include <cassert>
#include "image.h"
// clears image.
void Image::clear()
{
_data.clear(); _w = _h = _bPR = 0;
}
// allocates image data.
uint8* Image::resize( // returns allocated buffer
int w, // image width
int h, // image height
int bPR) // bits per row
{
assert(w >= 0 && 3 * w <= bPR);
assert(h >= 0);
_w = w; _h = h; _bPR = bPR;
const size_t size = h * bPR;
_data.resize(size);
return _data.data();
}
// returns pixel.
int Image::getPixel(
int x, // column
int y) // row
const {
assert(x >= 0 && x < _w);
assert(y >= 0 && y < _h);
const size_t offs = y * _bPR + 3 * x;
return _data[offs + 0]
| _data[offs + 1] << 8
| _data[offs + 2] << 16;
}
// sets pixel.
void Image::setPixel(
int x, // column
int y, // row
uint8 r, uint8 g, uint8 b) // R, G, B values
{
assert(x >= 0 && x < _w);
assert(y >= 0 && y < _h);
const size_t offs = y * _bPR + 3 * x;
_data[offs + 0] = r;
_data[offs + 1] = g;
_data[offs + 2] = b;
}
Image Scaling
imageScale.h:
#ifndef IMAGE_SCALE_H
#define IMAGE_SCALE_H
#include "image.h"
/* scales an image to a certain width and height.
*
* Note:
* imgSrc and imgDst may not be identical.
*/
bool scaleTo( // returns true if successful
const Image &imgSrc, // source image
Image &imgDst, // destination image
int w, int h, // destination width and height
int align = 4); // row alignment
/* scales an image about a certain horizontal/vertical scaling factor.
*
* Note:
* imgSrc and imgDst may not be identical.
*/
inline bool scaleXY( // returns true if successful
const Image &imgSrc, // source image
Image &imgDst, // destination image
double sX, // horizontal scaling factor (must be > 0 but not too large)
double sY, // vertical scaling factor (must be > 0 but not too large)
int align = 4) // row alignment
{
return sX > 0.0 && sY > 0.0
? scaleTo(imgSrc, imgDst,
(int)(sX * imgSrc.w()), (int)(sY * imgSrc.h()), align)
: false;
}
/* scales an image about a certain scaling factor.
*
* Note:
* imgSrc and imgDst may not be identical.
*/
inline bool scale( // returns true if successful
const Image &imgSrc, // source image
Image &imgDst, // destination image
double s, // scaling factor (must be > 0 but not too large)
int align = 4) // row alignment
{
return scaleXY(imgSrc, imgDst, s, s, align);
}
#endif // IMAGE_SCALE_H
imageScale.cc:
#include <cassert>
#include <algorithm>
#include "imageScale.h"
namespace {
template <typename VALUE>
VALUE clip(VALUE value, VALUE min, VALUE max)
{
return value < min ? min : value > max ? max : value;
}
bool scaleDown(
const Image &imgSrc,
Image &imgDst,
int w, int h,
int align)
{
const int wSrc = imgSrc.w(), hSrc = imgSrc.h();
assert(w > 0 && w <= wSrc && h > 0 && h <= hSrc);
// compute scaling factors
const double sx = (double)wSrc / (double)w;
const double sy = (double)hSrc / (double)h;
const double sxy = sx * sy;
// prepare destination image
imgDst.resize(w, h, (w * 3 + align - 1) / align * align);
// cache some data
const uint8 *const dataSrc = imgSrc.data();
const int bPRSrc = imgSrc.bPR();
// perform scaling
for (int y = 0; y < h; ++y) {
const double yStart = sy * y;
const double yEnd = std::min(sy * (y + 1), (double)hSrc);
const int yStartInt = (int)yStart;
const int yEndInt = (int)yEnd - (yEndInt == yEnd);
const double tFrm = 1 + yStartInt - yStart, bFrm = yEnd - yEndInt;
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc);
const int xStartInt = (int)xStart;
const int xEndInt = (int)xEnd - (xEndInt == xEnd);
double lFrm = 1 + xStartInt - xStart, rFrm = xEnd - xEndInt;
double pixel[3] = { 0.0, 0.0, 0.0 }; // values of target pixel
for (int i = yStartInt; i <= yEndInt; ++i) {
int jData = i * bPRSrc + xStartInt * 3;
for (int j = xStartInt; j <= xEndInt; ++j) {
double pixelAdd[3];
for (int k = 0; k < 3; ++k) {
pixelAdd[k] = (double)dataSrc[jData++] / sxy;
}
if (j == xStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= lFrm;
} else if (j == xEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= rFrm;
}
if (i == yStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= tFrm;
} else if (i == yEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= bFrm;
}
for (int k = 0; k < 3; ++k) pixel[k] += pixelAdd[k];
}
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
}
// done
return true;
}
bool scaleUp(
const Image &imgSrc,
Image &imgDst,
int w, int h,
int align)
{
const int wSrc = imgSrc.w(), hSrc = imgSrc.h();
assert(w && w >= wSrc && h && h >= hSrc);
// compute scaling factors
const double sx = (double)wSrc / (double)w;
const double sy = (double)hSrc / (double)h;
// prepare destination image
imgDst.resize(w, h, (w * 3 + align - 1) / align * align);
// cache some data
const uint8 *const dataSrc = imgSrc.data();
const int bPRSrc = imgSrc.bPR();
// perform scaling
for (int y = 0; y < h; ++y) {
const double yStart = sy * y;
const double yEnd = std::min(sy * (y + 1), (double)hSrc - 1);
const int yStartInt = (int)yStart;
const int yEndInt = (int)yEnd;
if (yStartInt < yEndInt) {
const double bFract = clip((double)((yEnd - yEndInt) / sy), 0.0, 1.0);
const double tFract = 1.0 - bFract;
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc - 1);
const int xStartInt = (int)xStart, xEndInt = (int)xEnd;
double pixel[4];
if (xStartInt < xEndInt) {
const double rFract
= clip((double)((xEnd - xEndInt) / sx), 0.0, 1.0);
const double lFract = 1.0 - rFract;
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] = tFract * lFract * dataSrc[jData++];
}
for (int k = 0; k < 3; ++k) {
pixel[k] += tFract * rFract * dataSrc[jData++];
}
jData = yEndInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] += bFract * lFract *dataSrc[jData++];
}
for (int k = 0; k < 3; ++k) {
pixel[k] += bFract * rFract *dataSrc[jData++];
}
} else {
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] = tFract * dataSrc[jData++];
}
jData = yEndInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] += bFract * dataSrc[jData++];
}
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
} else {
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc - 1);
const int xStartInt = (int)xStart, xEndInt = (int)xEnd;
double pixel[3];
if (xStartInt < xEndInt) {
const double rFract
= clip((double)((xEnd - xEndInt) / sx), 0.0, 1.0);
const double lFract = 1.0 - rFract;
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] = lFract * dataSrc[jData++];
}
for (int k = 0; k < 3; ++k) {
pixel[k] += rFract * dataSrc[jData++];
}
} else {
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) pixel[k] = dataSrc[jData++];
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
}
}
// done
return true;
}
} // namespace
bool scaleTo(const Image &imgSrc, Image &imgDst, int w, int h, int align)
{
Image imgTmp;
return w <= 0 || h <= 0 ? false
: w >= imgSrc.w() && h >= imgSrc.h()
? scaleUp(imgSrc, imgDst, w, h, align)
: w <= imgSrc.w() && h <= imgSrc.h()
? scaleDown(imgSrc, imgDst, w, h, align)
: w >= imgSrc.w()
? scaleUp(imgSrc, imgTmp, w, imgSrc.h(), 1)
&& scaleDown(imgTmp, imgDst, w, h, align)
: scaleDown(imgSrc, imgTmp, w, imgSrc.h(), 1)
&& scaleUp(imgTmp, imgDst, w, h, align);
}
PPM file IO
imagePPM.h:
#ifndef IMAGE_PPM_H
#define IMAGE_PPM_H
#include <iostream>
#include "image.h"
// reads a binary PPM file.
bool readPPM( // returns true if successful
std::istream &in, // input stream (must be opened with std::ios::binary)
Image &img, // image to read into
int align = 4); // row alignment
// writes binary PPM file.
bool writePPM( // returns true if successful
std::ostream &out, // output stream (must be opened with std::ios::binary)
const Image &img); // image to write from
#endif // IMAGE_PPM_H
imagePPM.cc:
#include <sstream>
#include <string>
#include "imagePPM.h"
// reads a binary PPM file.
bool readPPM( // returns true if successful
std::istream &in, // input stream (must be opened with std::ios::binary)
Image &img, // image to read into
int align) // row alignment
{
// parse header
std::string buffer;
if (!getline(in, buffer)) return false;
if (buffer != "P6") {
std::cerr << "Wrong header! 'P6' expected.\n";
return false;
}
int w = 0, h = 0, t = 0;
for (int i = 0; i < 3;) {
if (!getline(in, buffer)) return false;
if (buffer.empty()) continue; // skip empty lines
if (buffer[0] == '#') continue; // skip comments
std::istringstream str(buffer);
switch (i) {
case 0:
if (!(str >> w)) continue;
++i;
case 1:
if (!(str >> h)) continue;
++i;
case 2:
if (!(str >> t)) continue;
++i;
}
}
if (t != 255) {
std::cerr << "Unsupported format! t = 255 expected.\n";
return false;
}
// allocate image buffer
uint8 *data = img.resize(w, h, (w * 3 + align - 1) / align * align);
// read data
for (int i = 0; i < h; ++i) {
if (!in.read((char*)data, 3 * img.w())) return false;
data += img.bPR();
}
// done
return true;
}
// writes binary PPM file.
bool writePPM( // returns true if successful
std::ostream &out, // output stream (must be opened with std::ios::binary)
const Image &img) // image to write from
{
// write header
if (!(out << "P6\n" << img.w() << ' ' << img.h() << " 255\n")) return false;
// write image data
for (size_t y = 0; y < img.h(); ++y) {
const uint8 *const data = img.data(y);
if (!out.write((const char*)data, 3 * img.w())) return false;
}
// done
return true;
}
The main application
scaleRGBImg.cc:
#include <iostream>
#include <fstream>
#include <string>
#include "image.h"
#include "imagePPM.h"
#include "imageScale.h"
int main(int argc, char **argv)
{
// read command line arguments
if (argc <= 3) {
std::cerr << "Missing arguments!\n";
std::cout
<< "Usage:\n"
<< " scaleRGBImg IN_FILE SCALE OUT_FILE\n";
return 1;
}
const std::string inFile = argv[1];
char *end;
const double s = std::strtod(argv[2], &end);
if (end == argv[2] || *end != '\0') {
std::cerr << "Invalid scale factor '" << argv[2] << "'!\n";
return 1;
}
if (s <= 0.0) {
std::cerr << "Invalid scale factor " << s << "!\n";
return 1;
}
const std::string outFile = argv[3];
// read image
Image imgSrc;
{ std::ifstream fIn(inFile.c_str(), std::ios::binary);
if (!readPPM(fIn, imgSrc)) {
std::cerr << "Reading '" << inFile << "' failed!\n";
return 1;
}
}
// scale image
Image imgDst;
if (!scale(imgSrc, imgDst, s)) {
std::cerr << "Scaling failed!\n";
return 1;
}
// write image
{ std::ofstream fOut(outFile.c_str(), std::ios::binary);
if (!writePPM(fOut, imgDst) || (fOut.close(), !fOut.good())) {
std::cerr << "Writing '" << outFile << "' failed!\n";
return 1;
}
}
// done
return 0;
}
Test
Compiled in cygwin64:
$ g++ -std=c++11 -o scaleRGBImg scaleRGBImg.cc image.cc imagePPM.cc imageScale.cc
$
A sample image test.ppm for a test – converted to PPM in GIMP:
Test with the sample image:
$ for I in 0.8 0.6 0.4 0.2 ; do echo ./scaleRGBImg test.ppm $I test.$I.ppm ; done
./scaleRGBImg test.ppm 0.8 test.0.8.ppm
./scaleRGBImg test.ppm 0.6 test.0.6.ppm
./scaleRGBImg test.ppm 0.4 test.0.4.ppm
./scaleRGBImg test.ppm 0.2 test.0.2.ppm
$ for I in 0.8 0.6 0.4 0.2 ; do ./scaleRGBImg test.ppm $I test.$I.ppm ; done
$
This is what came out:
test.0.8.ppm:
test.0.6.ppm:
test.0.4.ppm:
test.0.2.ppm:

Solving knight's tour problem on a huge board?

I have found this code that solves the Knight's Tour problem.
If I, for example, want to solve a board of size 800x800 I get the following error:
Exception thrown at 0x00007FF6345D3778 in test.exe: 0xC00000FD: Stack overflow (parameters: 0x0000000000000001, 0x00000082140C3000).
Unhandled exception at 0x00007FF6345D3778 in test.exe: 0xC00000FD: Stack overflow (parameters: 0x0000000000000001, 0x00000082140C3000).
How can I avoid this error? How should I change the Board class such that it can solve a board this big?
I want to be able to write: Board<800> b6 for example.
PS. This code works for small boards.
Thank you very much.
class Board
{
public:
array<pair<int, int>, 8> moves;
array<array<int, N>, N> data;
Board()
{
moves[0] = make_pair(2, 1);
moves[1] = make_pair(1, 2);
moves[2] = make_pair(-1, 2);
moves[3] = make_pair(-2, 1);
moves[4] = make_pair(-2, -1);
moves[5] = make_pair(-1, -2);
moves[6] = make_pair(1, -2);
moves[7] = make_pair(2, -1);
}
array<int, 8> sortMoves(int x, int y) const
{
array<tuple<int, int>, 8> counts;
for (int i = 0; i < 8; ++i)
{
int dx = get<0>(moves[i]);
int dy = get<1>(moves[i]);
int c = 0;
for (int j = 0; j < 8; ++j)
{
int x2 = x + dx + get<0>(moves[j]);
int y2 = y + dy + get<1>(moves[j]);
if (x2 < 0 || x2 >= N || y2 < 0 || y2 >= N)
continue;
if (data[y2][x2] != 0)
continue;
c++;
}
counts[i] = make_tuple(c, i);
}
sort(counts.begin(), counts.end());
array<int, 8> out;
for (int i = 0; i < 8; ++i)
out[i] = get<1>(counts[i]);
return out;
}
void solve(string start)
{
for (int v = 0; v < N; ++v)
for (int u = 0; u < N; ++u)
data[v][u] = 0;
int x0 = start[0] - 'a';
int y0 = N - (start[1] - '0');
data[y0][x0] = 1;
array<tuple<int, int, int, array<int, 8>>, N*N> order;
order[0] = make_tuple(x0, y0, 0, sortMoves(x0, y0));
int n = 0;
while (n < N*N - 1)
{
int x = get<0>(order[n]);
int y = get<1>(order[n]);
bool ok = false;
for (int i = get<2>(order[n]); i < 8; ++i)
{
int dx = moves[get<3>(order[n])[i]].first;
int dy = moves[get<3>(order[n])[i]].second;
if (x + dx < 0 || x + dx >= N || y + dy < 0 || y + dy >= N)
continue;
if (data[y + dy][x + dx] != 0)
continue;
++n;
get<2>(order[n]) = i + 1;
data[y + dy][x + dx] = n + 1;
order[n] = make_tuple(x + dx, y + dy, 0, sortMoves(x + dx, y + dy));
ok = true;
break;
}
if (!ok) // Failed. Backtrack.
{
data[y][x] = 0;
--n;
}
}
}
template<int N>
friend ostream& operator<<(ostream &out, const Board<N> &b);
};
template<int N>
ostream& operator<<(ostream &out, const Board<N> &b)
{
for (int v = 0; v < N; ++v)
{
for (int u = 0; u < N; ++u)
{
if (u != 0) out << ",";
out << setw(3) << b.data[v][u];
}
out << endl;
}
return out;
}
int main{
Board<800> b2;
b2.solve("b5");
cout << b2 << endl;
return 0
}
array<array<int, N>, N> data with N being 800 requires around 2.5 MB of memory.
Board<800> b2 is allocated on the stack.
Depending on the platform the default stack size is around 2-8MB. It looks like you are on windows where the stack size is usually 2MB. As your array is larger than the size of the stack you get a stack overflow.
You need to allocate Board on the heap. e.g.:
int main{
auto b2 = std::make_unique<Board<800>>();
b2->solve("b5");
cout << *b2 << endl;
return 0
}
In the solve function you are also allocating order on the stack. This should be changed to something like this in order to allocate it on the heap:
auto orderPointer = std::make_unique<array<tuple<int, int, int, array<int, 8>>, N*N>>();
// dereference the pointer to make array indexes easier
auto& order = *orderPointer;

Time comparing on debug mode and release mode(Visual studio 2008)

I implemented A* and JPS(Jump Point Search) using VS2008.
Then i tried comparing time cost of these code.
On debug mode, (my) JPS is faster than A* about 2.0~50 times .
But on release mode, JPS is faster than A* about 0.6~3.0 times.
Especially, almost cases of test on release mode, JPS slower than A*.
Why results are so different?
In the paper( "Online Graph Pruning for Pathfinding on Grid Maps", 2011 ),
JPS is faster than A* about 20~30 times.
If i want to get a similar results in the paper, what should i do?
I just call map1.A_star() and map2.JPS() in main.cpp.
and I used prioiry_queue(STL) for A* and JPS.
↓ pathfinding.cpp
#include "util.h"
using namespace std;
int DIR_X[8] = { 0, 1, 1, 1, 0, -1, -1, -1 };
int DIR_Y[8] = { -1, -1, 0, 1, 1, 1, 0, -1 };
// diagonal index {1, 3, 5, 7}
template<class T>
void vector_clear(vector<T>& vecObj)
{
vector<T> tempObj;
tempObj.swap(vecObj);
}
bool operator<(const Node& a, const Node& b)
{
return a.getPriority() > b.getPriority();
}
void read_scenario(char* path, char(*scenarios)[256], int& total) {
ifstream scen_file(path);
char buffer[256];
int num = 0;
scen_file.getline(buffer, 256);
while (!scen_file.eof()) {
scen_file.getline(buffer, 256);
num++;
int index1;
int index2;
int count = 0;
for (int i = 0; i<256; i++) {
if (buffer[i] == ' ')
count++;
if (buffer[i] == ' ' && count == 4)
index1 = i;
if (buffer[i] == ' ' && count == 8)
index2 = i;
}
for (int i = index1 + 1; i <= index2 - 1; i++) {
scenarios[num][i - (index1 + 1)] = buffer[i];
}
scenarios[num][index2] = NULL;
}
std::cout << num << " 개의 시나리오가 있습니다." << endl;
total = num;
scen_file.close();
}
Map::Map(int* START_GOAL, char* IN_PATH, char* OUT_PATH, string MODE) {
sx = START_GOAL[0];
sy = START_GOAL[1];
gx = START_GOAL[2];
gy = START_GOAL[3];
mode = MODE;
in_path = IN_PATH;
out_path = OUT_PATH;
ifstream map_file(in_path);
if (!map_file.is_open()) {
std::cout << "there is no map_file" << endl;
}
char buffer[128];
char ch[4];
char cw[4];
map_file.getline(buffer, 128);
map_file.getline(buffer, 128);
for (int i = 7; i < strlen(buffer); i++) {
ch[i - 7] = buffer[i];
}
h = atoi(ch);
std::cout << "height = " << h << endl;;
map_file.getline(buffer, 128);
for (int i = 6; i < strlen(buffer); i++) {
cw[i - 6] = buffer[i];
}
w = atoi(cw);
std::cout << "width = " << w << endl;;
map_file.getline(buffer, 128);
std::cout << "Start at (" << sx << " " << sy << ")" << endl;
std::cout << "Goal is (" << gx << " " << gy << ")" << endl;
std::cout << endl;
in_map = new char*[h];
direction_map = new int*[h];
visit_map = new bool*[h];
out_map = new char*[h];
parent_map = new int*[h];
open_node_map = new double*[h];
for (int j = 0; j<h; j++) {
in_map[j] = new char[w];
direction_map[j] = new int[w];
visit_map[j] = new bool[w];
out_map[j] = new char[w];
parent_map[j] = new int[w];
open_node_map[j] = new double[w];
for (int i = 0; i <= w; i++) {
char tmp;
map_file.get(tmp);
if (w == i)
continue;
in_map[j][i] = tmp;
direction_map[j][i] = -1;
visit_map[j][i] = false;
out_map[j][i] = tmp;
open_node_map[j][i] = 0.0;
}
}
map_file.close();
}
Map::~Map() {
for (int i = 0; i<h; i++) {
delete[] parent_map[i];
delete[] in_map[i];
delete[] direction_map[i];
delete[] visit_map[i];
delete[] out_map[i];
delete[] open_node_map[i];
}
delete[] parent_map;
delete[] in_map;
delete[] direction_map;
delete[] visit_map;
delete[] out_map;
delete[] open_node_map;
}
int Map::getGx() const { return gx; }
int Map::getGy() const { return gy; }
int Map::getSx() const { return sx; }
int Map::getSy() const { return sy; }
int Map::getHeight() const { return h; }
int Map::getWidth() const { return w; }
double Map::getOptimalLength() const { return optimal_length; }
char Map::getInMapData(int x, int y) { return in_map[y][x]; }
int Map::getDirectionData(int x, int y) { return direction_map[y][x]; }
bool Map::getVisitMapData(int x, int y) { return visit_map[y][x]; }
int Map::getParentMapData(int x, int y) { return parent_map[y][x]; }
double Map::getOpen_NodeData(int x, int y) { return open_node_map[y][x]; }
char Map::getOutMapData(int x, int y) const { return out_map[y][x]; }
void Map::setVisitMap(int x, int y, bool data) { visit_map[y][x] = data; }
void Map::setDirectionMap(int x, int y, int data) { direction_map[y][x] = data; }
void Map::setOutMap(int x, int y, char data) { out_map[y][x] = data; }
void Map::setParentMap(int x, int y, int data) { parent_map[y][x] = data; }
void Map::setOpen_NodeMap(int x, int y, double data) { open_node_map[y][x] = data; }
void Map::initialize() {}
void Map::draw_map() {
ofstream out_file(out_path);
for (int j = 0; j<h; j++) {
for (int i = 0; i<w; i++) {
if (j == sy && i == sx)
out_map[j][i] = 'S';
if (j == gy && i == gx)
out_map[j][i] = 'G';
out_file << out_map[j][i];
}
out_file << "\r\n";
}
out_file.close();
}
void Map::A_star() {
priority_queue<Node> search_q[2];
Node startPoint(sx, sy, gx, gy, -1, 0, mode);
int pqi = 0;
search_q[pqi].push(startPoint);
Map::setOpen_NodeMap(sx, sy, startPoint.getPriority());
while (!search_q[pqi].empty()) {
int cx = search_q[pqi].top().getX(); // current x, y
int cy = search_q[pqi].top().getY();
double passedLength_c = search_q[pqi].top().getPassedLength();
Map::setVisitMap(cx, cy, true);
Map::setOpen_NodeMap(cx, cy, search_q[pqi].top().getPriority());
search_q[pqi].pop();
if (cx == gx && cy == gy) {
double shortestLength = 0;
while (1) {
if ((cx == sx) && (cy == sy)) break;
int tmp_x, tmp_y, tmp_dir;
tmp_x = cx;
tmp_y = cy;
tmp_dir = getDirectionData(tmp_x, tmp_y);
cx -= DIR_X[tmp_dir];
cy -= DIR_Y[tmp_dir];
setOutMap(cx, cy, '#');
if (tmp_dir % 2 == 1)
shortestLength += sqrt(2.0);
else
shortestLength += 1.0;
}
cout << "A_star find!" << endl;
cout << "Path Length = " << shortestLength << endl;
optimal_length = shortestLength;
while (!search_q[pqi].empty()) {
search_q[pqi].pop();
}
return;
}
for (int dir = 0; dir<8; dir++) {
// next_node
int nx = cx + DIR_X[dir];
int ny = cy + DIR_Y[dir];
if (!(nx >(w - 1) || nx < 0 || ny >(h - 1) || ny < 0 || getInMapData(nx, ny) == '#' || getVisitMapData(nx, ny) == true)) {
Node next_node(nx, ny, gx, gy, passedLength_c, dir, mode, 1);
if (Map::getOpen_NodeData(nx, ny) == 0) {
Map::setOutMap(nx, ny, 'I');
Map::setOpen_NodeMap(nx, ny, next_node.getPriority());
search_q[pqi].push(next_node);
Map::setDirectionMap(nx, ny, dir);
}
else if (Map::getOpen_NodeData(nx, ny) > next_node.getPriority()) {
Map::setOpen_NodeMap(nx, ny, next_node.getPriority());
Map::setDirectionMap(nx, ny, dir);
search_q[pqi].push(next_node);
/*
while (!(search_q[pqi].top().getX() == nx && search_q[pqi].top().getY() == ny))
search_q[1 - pqi].push(search_q[pqi].top());
search_q[pqi].pop();
}
search_q[pqi].pop();
if (search_q[pqi].size() > search_q[1 - pqi].size()) {
pqi = 1 - pqi;
}
while (!search_q[pqi].empty()) {
search_q[1 - pqi].push(search_q[pqi].top());
search_q[pqi].pop();
}
pqi = 1 - pqi;
search_q[pqi].push(next_node);
*/
}
}
}
}
}
void Map::JPS() {
priority_queue <Node> JumpPoints;
Node startPoint(sx, sy, gx, gy, 0, -1, mode, 1);
startPoint.calculateDistanceToGoal();
startPoint.updatePriority();
JumpPoints.push(startPoint);
while (!JumpPoints.empty()) {
int x = JumpPoints.top().getX();
int y = JumpPoints.top().getY();
if (x == gx && y == gy) {
cout << "JPS find!!!" << endl;
double shortestLength = 0;
while (!(x == Map::getSx() && y == Map::getSy())) {
int fix_x = x;
int fix_y = y;
int tmp_dir = getDirectionData(fix_x, fix_y);
int px = Map::getParentMapData(fix_x, fix_y) % 512;
int py = Map::getParentMapData(fix_x, fix_y) / 512;
//while(!(Map::getOutMapData(x, y) == 'J')){
while (!(px == x && py == y)) {
if (Map::getParentMapData(fix_x, fix_y) == (y * Map::getWidth() + x)) break;
//if(!(Map::getOutMapData(x, y) == 'J')){
if (!(Map::getOutMapData(x, y) == 'J')) {
setOutMap(x, y, '#');
}
x -= DIR_X[tmp_dir];
y -= DIR_Y[tmp_dir];
if (tmp_dir % 2 == 1)
shortestLength += sqrt(2.0);
else
shortestLength += 1.0;
}
}
optimal_length = shortestLength;
cout << "Path Length = " << shortestLength << endl;
//cout<<"Path Length = "<< passedLength_c << endl;
return;
}
else
Map::identifySuccessors(JumpPoints);
}
while (!JumpPoints.empty()) {
JumpPoints.pop();
}
cout << "not found" << endl;
}
/*
Node Map::jump(Node const node, int dir, int& off) {
int nx = node.getX() + DIR_X[dir];
int ny = node.getY() + DIR_Y[dir];
if (nx > (w - 1) || nx < 0 || ny >(h - 1) || ny < 0) {
// Map::setOutMap(nx, ny, 'B');
Node NULL_node(-100, -100, 0, 0, 0, 0, "OCTILE", 1);
return NULL_node;
}
char n_MapData = Map::getOutMapData(nx, ny);
if (n_MapData == '#') {
Node NULL_node(-100, -100, 0, 0, 0, 0, "OCTILE", 1);
return NULL_node;
}
Node n_node(nx, ny, gx, gy, node.getPassedLength(), dir);
if (n_MapData == 'I')
Map::setOutMap(nx, ny, 'X');
if (nx == gx && ny == gy) {
off = 1;
return n_node;
}
int forced_neighbours_bits = Map::forced_neighbours(nx, ny, dir);
if (forced_neighbours_bits > 0) {
//Map::setOutMap(nx, ny, 'F');
return n_node;
}
if (dir % 2 == 1) {
// Algorithm 2 function jump 8th line)
if (Map::jump(n_node, (dir + 7) % 8, off).getX() != -100)
return n_node;
if (Map::jump(n_node, (dir + 1) % 8, off).getX() != -100)
return n_node;
}
if (n_MapData != 'S' && n_MapData != 'I' && n_MapData != 'G' && n_MapData != '#' && n_MapData != 'J')
Map::setOutMap(nx, ny, 'I');
//draw_map();
return Map::jump(n_node, dir, off);
}
*/
int Map::jump(int index, int dir, int& off) {
int x = index % w;
int y = index / w;
int nx = x + DIR_X[dir];
int ny = y + DIR_Y[dir];
int n_index = ny * w + nx;
if (nx > (w - 1) || nx < 0 || ny >(h - 1) || ny < 0) {
// Map::setOutMap(nx, ny, 'B');
return -1;
}
char n_MapData = Map::getOutMapData(nx, ny);
if (n_MapData == '#') {
return -1;
}
if (n_MapData == 'I')
Map::setOutMap(nx, ny, 'X');
if (nx == gx && ny == gy) {
off = 1;
return n_index;
}
int forced_neighbours_bits = Map::forced_neighbours(nx, ny, dir);
if (forced_neighbours_bits > 0) {
//Map::setOutMap(nx, ny, 'F');
return n_index;
}
if (dir % 2 == 1) {
// Algorithm 2 function jump 8th line)
if (Map::jump(n_index, (dir + 7) % 8, off) != -1)
return n_index;
if (Map::jump(n_index, (dir + 1) % 8, off) != -1)
return n_index;
}
if (n_MapData != 'S' && n_MapData != 'I' && n_MapData != 'G' && n_MapData != '#' && n_MapData != 'J')
Map::setOutMap(nx, ny, 'I');
//draw_map();
return Map::jump(n_index, dir, off);
}
void Map::identifySuccessors(priority_queue <Node>& successors) {
int x = successors.top().getX();
int y = successors.top().getY();
if (x == gx && y == gy)
return;
int index = y * Map::getWidth() + x;
int dir = successors.top().getDirection();
double passedLength = successors.top().getPassedLength();
Node start(x, y, gx, gy, passedLength, dir, mode, 1);
start.updatePassedLength();
start.calculateDistanceToGoal();
start.updatePriority();
successors.pop();
vector<int> candidate_dir;
if (dir == -1) {
for (int i = 0; i<8; i++) {
int dx = x + DIR_X[i];
int dy = y + DIR_Y[i];
if (!(dx < 0 || dx >(w - 1) || dy < 0 || dy >(h - 1) || Map::getOutMapData(dx, dy) == '#'))
candidate_dir.push_back(i);
}
}
else {
int bits = Map::forced_neighbours(x, y, dir);
for (int i = 0; i<8; i++) {
if (bits & (1 << i))
candidate_dir.push_back(i);
}
if (dir % 2 == 1) {
int dx = x + DIR_X[(dir + 1) % 8];
int dy = y + DIR_Y[(dir + 1) % 8];
if (!(dx < 0 || dx >(w - 1) || dy < 0 || dy >(h - 1) || Map::getOutMapData(dx, dy) == '#'))
candidate_dir.push_back((dir + 1) % 8);
dx = x + DIR_X[(dir + 7) % 8];
dy = y + DIR_Y[(dir + 7) % 8];
if (!(dx < 0 || dx >(w - 1) || dy < 0 || dy >(h - 1) || Map::getOutMapData(dx, dy) == '#'))
candidate_dir.push_back((dir + 7) % 8);
dx = x + DIR_X[dir];
dy = y + DIR_Y[dir];
if (!(dx < 0 || dx >(w - 1) || dy < 0 || dy >(h - 1) || Map::getOutMapData(dx, dy) == '#'))
candidate_dir.push_back(dir);
}
else {
int dx = x + DIR_X[dir];
int dy = y + DIR_Y[dir];
if (!(dx < 0 || dx >(w - 1) || dy < 0 || dy >(h - 1) || Map::getOutMapData(dx, dy) == '#'))
candidate_dir.push_back(dir);
}
}
for (int i = 0; i<candidate_dir.size(); i++) {
int nx, ny, n_index;
int n_dir = candidate_dir[i];
nx = x + DIR_X[n_dir];
ny = y + DIR_Y[n_dir];
int jx, jy;
double j_passedLength, s_dist = 0.0, d_dist = 0.0;
int off = 0;
int j_index = Map::jump(index, n_dir, off);
if (j_index == -1)
continue;
jx = j_index % w;
jy = j_index / w;
j_passedLength = passedLength + sqrt((x - jx)*(x - jx) + (y - jy)*(y - jy));
Node j_node(jx, jy, gx, gy, j_passedLength, n_dir, mode, 1);
j_node.setPassedLength(j_passedLength);
j_node.calculateDistanceToGoal();
j_node.updatePriority();
if (Map::getOpen_NodeData(jx, jy) == 0) {
Map::setOutMap(jx, jy, 'J');
Map::setParentMap(jx, jy, y * Map::getWidth() + x);
Map::setDirectionMap(jx, jy, n_dir);
Map::setOpen_NodeMap(jx, jy, j_node.getPriority());
successors.push(j_node);
}
else if (Map::getOpen_NodeData(jx, jy) > j_node.getPriority()) {
Map::setOpen_NodeMap(jx, jy, j_node.getPriority());
Map::setDirectionMap(jx, jy, n_dir);
Map::setParentMap(jx, jy, y * Map::getWidth() + x);
}
}
candidate_dir.clear();
return;
cout << "not found" << endl;
}
/*
*/
bool Map::is_obstacle(int x, int y, int dir) {
int nx = x + DIR_X[dir];
int ny = y + DIR_Y[dir];
if (nx < 0 || nx >(w - 1) || ny < 0 || ny >(h - 1))
return false;
if (Map::getInMapData(nx, ny) == '#')
return true;
else
return false;
}
int Map::forced_neighbours(int x, int y, int dir) {
int bits = 0;
if (dir == -1)
return -1;
if (dir % 2 == 0) {
// straight
int ndir1 = (dir + 2) % 8;
int ndir2 = (dir + 6) % 8;
if (Map::is_obstacle(x, y, ndir1)) {
if (!Map::is_obstacle(x, y, (dir + 1) % 8))
bits = bits | 1 << ((dir + 1) % 8);
}
if (Map::is_obstacle(x, y, ndir2))
if (!Map::is_obstacle(x, y, (dir + 7) % 8))
bits = bits | 1 << ((dir + 7) % 8);
}
else {
int ndir1 = (dir + 3) % 8;
int ndir2 = (dir + 5) % 8;
if (Map::is_obstacle(x, y, ndir1))
if (!Map::is_obstacle(x, y, (dir + 1) % 8))
bits = bits | 1 << ((dir + 1) % 8);
if (Map::is_obstacle(x, y, ndir2))
if (!Map::is_obstacle(x, y, (dir + 7) % 8))
bits = bits | 1 << ((dir + 7) % 8);
}
return bits;
}
Node::Node(int const x, int const y, int const gx, int const gy, double const passedLength, int const direction, string const mode, int const k) {
this->x = x; this->y = y;
this->direction = direction;
this->passedLength = passedLength;
this->gx = gx; this->gy = gy;
this->mode = mode;
if (k == 1) {
updatePassedLength();
calculateDistanceToGoal();
updatePriority();
}
}
Node::~Node() {}
int Node::getX() const { return x; }
int Node::getY() const { return y; }
int Node::getDirection() const { return direction; }
double Node::getPriority() const { return priority; }
double Node::getPassedLength() const { return passedLength; }
double Node::getDistanceToGoal() const { return distanceToGoal; }
void Node::calculateDistanceToGoal() {
double xd = abs(x - gx);
double yd = abs(y - gy);
if (mode.compare("MANHATTAN") == 0)
distanceToGoal = abs(xd) + abs(yd);
else if (mode.compare("EUCLIDIAN") == 0)
distanceToGoal = sqrt((xd*xd) + (yd*yd));
else if (mode.compare("CHEBYSHEV") == 0)
distanceToGoal = max(abs(xd), abs(yd));
else if (mode.compare("OCTILE") == 0)
distanceToGoal = max(xd, yd) + (sqrt(2.0) - 1) + min(xd, yd);
else
cout << "plz input mode" << endl;
}
void Node::updatePassedLength() {
if (direction == -1);
else if (direction % 2 == 1)
passedLength += (sqrt(2.0));
else
passedLength += (1);
}
void Node::updatePriority() {
priority = passedLength + distanceToGoal;
}
void Node::setPassedLength(double data) {
passedLength = data;
}
util.h
#include <iostream>
#include <string>
#include <queue>
#include <math.h>
#include <fstream>
#include <istream>
#include <cstdio>
#include <vector>
using namespace std;
class Node {
private:
int x;
int y;
int gx;
int gy;
int direction; // direction from past node
double passedLength;
double distanceToGoal;
double priority;
string mode; // mode -> "MANHATTAN", "EUCLIDIAN", CHEBYSHEV", "OCTILE"
public:
Node(int const x, int const y, int const gx, int const gy, double const passedLength, int const direction = 0, string const mode = "MANHATTAN" , int const k = 0 );
~Node();
int getX() const;
int getY() const;
int getDirection() const;
double getPriority() const;
double getPassedLength() const;
double getDistanceToGoal() const;
void setPassedLength(double data);
void calculateDistanceToGoal();
void updatePassedLength();
void updatePriority();
};
class Map {
public:
int gx, gy, sx, sy, w, h;
char* in_path, *out_path;
string mode;
char** in_map;
int** direction_map;
bool** visit_map;
char** out_map;
int** parent_map;
double** open_node_map;
double optimal_length;
public:
Map(int* START_GOAL, char* IN_PATH, char* OUT_PATH, string MODE);
~Map();
int getGx() const;
int getGy() const;
int getSx() const;
int getSy() const;
char getInMapData(int x, int y);
int getDirectionData(int x, int y);
bool getVisitMapData(int x, int y);
int getParentMapData(int x, int y);
double getOpen_NodeData(int x, int y);
int getHeight() const;
int getWidth() const;
double getOptimalLength() const;
char getOutMapData(int x, int y) const;
void setVisitMap(int x, int y, bool data);
void setDirectionMap(int x, int y, int data);
void setOutMap(int x, int y, char data);
void setParentMap(int x, int y, int data);
void setOpen_NodeMap(int x, int y, double data);
void initialize();
void draw_map();
void A_star();
void JPS();
//int identifySuccessors(int x, int y);
void identifySuccessors(priority_queue <Node>& successors);
//int jump(int node_index, int dir, double& s_distance, priority_queue <Node>& successors, double& d_distance, int& trig, int& fx_fy);
//Node Map::jump(Node node, int dir, int& off);
int Map::jump(int index, int dir, int& off);
bool is_obstacle(int x, int n, int dir);
int Map::forced_neighbours(int x, int y, int dir);
};
void read_scenario(char* path, char(*scenarios)[256], int& total);
Without an individual review of each line of code, it appears that the speed difference is due to your programming style.
To take just the first two examples: vector_clear is a poor reimplementation of std::vector::clear and operator<(Node a, Node b) makes unnecessary copies of both nodes. And glancing at the rest of the code, these do not appear to be exceptions.
Measuring the speed of a debug executable is pointless. The compiler settings used for debugging do not take speed of the resulting executable into account. And you've further complicated it by using a debug version of new. Only the speed in release mode is reasonable, and then only if you have good code to start with.

Trouble with a namespace in a header file, "undefined reference to" error?

I'm getting the following errors due to the namespace cpl?
I included Wavepacket.cpp and Vector.hpp below.
obj\Debug\wavepacket.o||In function `Z10initializev':|
wavepacket.cpp|79|undefined reference to `cpl::Vector::Vector(int)'|
wavepacket.cpp|79|undefined reference to `cpl::Vector::operator=(cpl::Vector const&)'|
wavepacket.cpp|80|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|80|undefined reference to `cpl::ComplexVector::operator=(cpl::ComplexVector const&)'|
wavepacket.cpp|81|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|81|undefined reference to `cpl::ComplexVector::operator=(cpl::ComplexVector const&)'|
wavepacket.cpp|101|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|101|undefined reference to `cpl::ComplexVector::operator=(cpl::ComplexVector const&)'|
wavepacket.cpp|102|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|102|undefined reference to `cpl::ComplexVector::operator=(cpl::ComplexVector const&)'|
wavepacket.cpp|103|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|103|undefined reference to `cpl::ComplexVector::operator=(cpl::ComplexVector const&)'|
obj\Debug\wavepacket.o||In function `Z8timeStepv':|
wavepacket.cpp|124|undefined reference to `cpl::solveTridiagonalCyclic(cpl::ComplexVector&, cpl::ComplexVector&, cpl::ComplexVector&, std::complex<double>, std::complex<double>, cpl::ComplexVector&, cpl::ComplexVector&)'|
wavepacket.cpp|126|undefined reference to `cpl::solveTridiagonal(cpl::ComplexVector&, cpl::ComplexVector&, cpl::ComplexVector&, cpl::ComplexVector&, cpl::ComplexVector&)'|
obj\Debug\wavepacket.o||In function `_static_initialization_and_destruction_0':|
wavepacket.cpp|22|undefined reference to `cpl::Vector::Vector(int)'|
wavepacket.cpp|71|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|71|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|72|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|72|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|72|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
||=== Build finished: 20 errors, 0 warnings ===|
Wavepacket.cpp
#include <cmath>
#include <complex>
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <string>
#include <sstream>
#include <GL/gl.h>
#include <GL/glu.h>
#include <GL/glut.h>
#include "Vector.hpp"
const double pi = 4*std::atan(1.0);
double h_bar = 1; // natural units
double mass = 1; // natural units
// The spatial grid
int N = 200; // number of interior grid points
double L = 100; // system extends from x=0 to x=L
double h = L / (N + 1); // grid size
double tau = 1; // time step
cpl::Vector x; // coordinates of grid points
bool periodic = true; // false = oo potential, true = periodic
// The potential V(x)
double V0 = 1.0; // height of potential well
double Vwidth = 10; // width of potential well
double Vcenter = 0.75 * L; // center of potential well
bool gaussian; // false = step potential
double V(double x) {
double halfWidth = std::abs(0.5 * Vwidth);
if (gaussian) {
double dx = (x - Vcenter) / halfWidth;
return V0 * std::exp( - dx * dx / 2);
} else {
if (std::abs(x - Vcenter) <= halfWidth)
return V0;
else
return 0;
}
}
// Inital wave packet
double x0 = L / 4; // location of center
double E = 1; // average energy
double sigma0 = L / 10; // width of wave packet
double Norm_psi; // norm of psi
double k0; // average wavenumber
double velocity; // average velocity
void getInput() {
std::cout << "Time-dependent Schroedinger Equation\n";
std::cout << "Enter size of x region L = ";
std::cin >> L;
std::cout << "Enter number of grid points N = ";
std::cin >> N;
std::cout << "Enter integration time step tau = ";
std::cin >> tau;
std::cout << "Enter width of potential = ";
std::cin >> Vwidth;
std::cout << "Enter height of potential V0 = ";
std::cin >> V0;
std::cout << "Enter width of packet sigma = ";
std::cin >> sigma0;
std::cout << "Enter energy of packet E = ";
std::cin >> E;
}
double t; // time
cpl::ComplexVector psi, chi; // complex wavefunction
cpl::ComplexVector a, b, c; // to represent tridiagonal Q matrix
std::complex<double> alpha, beta; // corner elements of Q
void initialize () {
t = 0;
// reset vectors
x = cpl::Vector(N);
psi = cpl::ComplexVector(N);
chi = cpl::ComplexVector(N);
// reset the lattice
h = L / (N + 1);
for (int j = 0; j < N; j++)
x[j] = (j + 1) * h;
// inititalize the packet
k0 = std::sqrt(2*mass*E - h_bar*h_bar/2/sigma0/sigma0) / h_bar;
velocity = k0 / mass;
Norm_psi = 1 / std::sqrt(sigma0 * std::sqrt(pi));
for (int j = 0; j < N; j++) {
double expFactor = std::exp(-(x[j] - x0) * (x[j] - x0)
/ (2 * sigma0 * sigma0));
psi[j] = std::complex<double>(
Norm_psi * std::cos(k0 * x[j]) * expFactor,
Norm_psi * std::sin(k0 * x[j]) * expFactor);
}
// elements of tridiagonal matrix Q = (1/2)(1 + i tau H / (2 hbar))
a = cpl::ComplexVector(N);
b = cpl::ComplexVector(N);
c = cpl::ComplexVector(N);
for (int j = 0; j < N; j++) {
const std::complex<double> i(0.0, 1.0);
b[j] = 0.5 + i * tau / (4 * h_bar) *
(V(x[j]) + h_bar * h_bar / (mass * h * h));
a[j] = c[j] = - i * tau * h_bar / (8 * mass * h * h);
}
alpha = c[N-1];
beta = a[0];
}
double T = 5; // time to travel length L
double framesPerSec = 50; // animation rate for screen redraws
void timeStep() {
static std::clock_t clockStart;
static bool done;
if (!done) {
double t0 = t;
do {
if (periodic)
solveTridiagonalCyclic(a, b, c, alpha, beta, psi, chi);
else
solveTridiagonal(a, b, c, psi, chi);
for (int j = 0; j < N; j++)
psi[j] = chi[j] - psi[j];
t += tau;
} while (std::abs(velocity * (t - t0)) < L / T / framesPerSec);
done = true;
}
std::clock_t clockNow = std::clock();
double seconds = (clockNow - clockStart) / double(CLOCKS_PER_SEC);
if ( seconds < 1 / framesPerSec ) {
return;
} else {
clockStart = clockNow;
done = false;
}
glutPostRedisplay();
glFlush();
}
void drawText(const std::string& str, double x, double y) {
glRasterPos2d(x, y);
int len = str.find('\0');
for (int i = 0; i < len; i++)
glutBitmapCharacter(GLUT_BITMAP_HELVETICA_12, str[i]);
}
bool showRealImaginary; // false = probability only
void display() {
glClear(GL_COLOR_BUFFER_BIT);
if (showRealImaginary) {
glColor3f(0, 0, 1); // real part of psi blue
glBegin(GL_LINES);
for (int j = 1; j < N; j++) {
glVertex2d(x[j-1], psi[j-1].real());
glVertex2d(x[j], psi[j].real());
}
glEnd();
glColor3f(0, 1, 0); // imaginary part of psi green
glBegin(GL_LINES);
for (int j = 1; j < N; j++) {
glVertex2d(x[j-1], psi[j-1].imag());
glVertex2d(x[j], psi[j].imag());
}
glEnd();
}
glColor3f(1, 0, 0); // probability red
double pOld = psi[0].real() * psi[0].real() +
psi[0].imag() * psi[0].imag();
glBegin(GL_LINES);
for (int j = 1; j < N; j++) {
double p = psi[j].real() * psi[j].real() +
psi[j].imag() * psi[j].imag();
glVertex2d(x[j-1], 4 * pOld);
glVertex2d(x[j], 4 * p);
pOld = p;
}
glEnd();
glColor3ub(255, 165, 0); // potential orange
double Vold = V(x[1]);
glBegin(GL_LINES);
for (int j = 1; j < N; j++) {
double Vnew = V(x[j]);
glVertex2d(x[j-1], 0.2 * Vold);
glVertex2d(x[j], 0.2 * Vnew);
Vold = Vnew;
}
glEnd();
glColor3f(0, 0, 0); // text black
std::ostringstream os;
os << (periodic ? "Periodic " : "Infinite Wall ")
<< "Boundary Conditions" << std::ends;
drawText(os.str(), 0.02 * L, 0.28);
os.seekp(0); // beginning of string stream
os << "0" << std::ends;
drawText(os.str(), 0, -0.02);
drawText("0", 0, -0.02);
os.seekp(0);
os << "x = " << L << std::ends;
drawText(os.str(), (1 - 0.1) * L, -0.02);
os.seekp(0);
os << "t = " << t << std::ends;
drawText(os.str(), 0.02 * L, -0.29);
glutSwapBuffers();
}
void reshape(int w, int h) {
glViewport(0, 0, w, h);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluOrtho2D(0, L, -0.3, 0.3);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
}
bool running; // to control animation
void mouse(int button, int state, int x, int y) {
switch (button) {
case GLUT_LEFT_BUTTON:
if (state == GLUT_DOWN) {
if (running) {
glutIdleFunc(NULL);
running = false;
} else {
glutIdleFunc(timeStep);
running = true;
}
}
break;
default:
break;
}
}
void menu(int menuItem) {
switch (menuItem) {
case 1:
gaussian = !gaussian;
break;
case 2:
periodic = !periodic;
break;
case 3:
showRealImaginary = !showRealImaginary;
break;
case 4:
if (running) {
glutIdleFunc(NULL);
running = false;
}
initialize();
glutPostRedisplay();
break;
default:
break;
}
}
int main(int argc, char *argv[]) {
getInput();
initialize();
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB);
glutInitWindowSize(600, 400);
glutInitWindowPosition(100, 100);
glutCreateWindow("Schroedinger Wave Packet Motion");
glClearColor(1.0, 1.0, 1.0, 0.0);
glShadeModel(GL_FLAT);
glutDisplayFunc(display);
glutReshapeFunc(reshape);
glutMouseFunc(mouse);
glutCreateMenu(menu);
glutAddMenuEntry("Potential: Square/Gaussian", 1);
glutAddMenuEntry("Boundaries: Dirichlet/Periodic", 2);
glutAddMenuEntry("Real & Imag: Show/Hide", 3);
glutAddMenuEntry("Reset", 4);
glutAttachMenu(GLUT_RIGHT_BUTTON);
glutMainLoop();
}
Vector.hpp
#ifndef CPL_VECTOR_HPP
#define CPL_VECTOR_HPP
#include <complex>
#include <iostream>
namespace cpl {
class Vector {
public:
Vector(int dim = 1);
Vector(const Vector& dv);
~Vector() { delete [] v; }
int dimension() const { return dim; }
void resize(const int);
const double operator[](const int i) const { return v[i]; }
double& operator[](const int i) { return v[i]; }
Vector& operator = (const Vector& dv);
Vector& operator += (const Vector& dv);
Vector& operator -= (const Vector& dv);
Vector& operator *= (double d);
Vector& operator /= (double d);
double abs();
double norm();
double dot(const Vector& dv);
friend std::ostream& operator<<(std::ostream& os, const Vector& dv);
private:
int dim;
double *v;
};
inline Vector operator + (const Vector& dv) {
return dv;
}
extern Vector operator - (const Vector& dv);
extern Vector operator * (const Vector& dv, double d);
extern Vector operator * (double d, const Vector& dv);
extern Vector operator / (const Vector& dv, double d);
extern Vector operator + (const Vector& v1, const Vector& v2);
extern Vector operator - (const Vector& v1, const Vector& v2);
class ComplexVector {
public:
ComplexVector(int dim = 1);
ComplexVector(const ComplexVector& cv);
~ComplexVector() { delete [] v; }
int dimension() const { return dim; }
const std::complex<double> operator[](const int i) const { return v[i]; }
std::complex<double>& operator[](const int i) { return v[i]; }
ComplexVector& operator = (const ComplexVector& cv);
private:
int dim;
std::complex<double> *v;
};
class FFT {
public:
FFT() { N = 0; f = 0; inverse = false; }
void transform(ComplexVector& data);
void inverseTransform(ComplexVector& data);
Vector power(ComplexVector& data);
private:
int N;
ComplexVector *f;
bool inverse;
void bitReverse();
void DanielsonLanczos(int n);
};
extern void solveTridiagonal(
ComplexVector& a, ComplexVector& b, ComplexVector& c,
ComplexVector& r, ComplexVector& u);
extern void solveTridiagonalCyclic(
ComplexVector& a, ComplexVector& b, ComplexVector& c,
std::complex<double> alpha, std::complex<double> beta,
ComplexVector& r, ComplexVector& x);
} /* end namespace cpl */
#endif /* CPL_VECTOR_HPP */
EDIT I didn't want to delete this post incase someone needed it but I forgot to use Vector.cpp which is below.
#include "Vector.hpp"
namespace cpl {
Vector::Vector(int dim) {
v = new double [this->dim = dim];
for (int i = 0; i < dim; i++) v[i] = 0;
}
Vector::Vector(const Vector& dv) {
v = new double [dim = dv.dim];
for (int i = 0; i < dim; i++) v[i] = dv.v[i];
}
void Vector::resize(const int dimension) {
delete [] v;
v = new double [dim = dimension];
for (int i = 0; i < dim; i++) v[i] = 0;
}
Vector& Vector::operator = (const Vector& dv) {
if (this != &dv) {
if (dim != dv.dim) {
delete [] v;
v = new double [dim = dv.dim];
}
for (int i = 0; i < dim; i++) v[i] = dv[i];
}
return *this;
}
Vector& Vector::operator += (const Vector& dv) {
for (int i = 0; i < dim; i++) v[i] += dv[i];
return *this;
}
Vector& Vector::operator -= (const Vector& dv) {
for (int i = 0; i < dim; i++) v[i] -= dv[i];
return *this;
}
Vector& Vector::operator *= (double d) {
for (int i = 0; i < dim; i++) v[i] *= d;
return *this;
}
Vector& Vector::operator /= (double d) {
for (int i = 0; i < dim; i++) v[i] /= d;
return *this;
}
Vector operator - (const Vector& dv) {
int dim = dv.dimension();
Vector temp(dim);
for (int i = 0; i < dim; i++) temp[i] = -dv[i];
return temp;
}
Vector operator * (const Vector& dv, double d) {
int dim = dv.dimension();
Vector temp(dim);
for (int i = 0; i < dim; i++) temp[i] = dv[i] * d;
return temp;
}
Vector operator * (double d, const Vector& dv) {
int dim = dv.dimension();
Vector temp(dim);
for (int i = 0; i < dim; i++) temp[i] = dv[i] * d;
return temp;
}
Vector operator / (const Vector& dv, double d) {
int dim = dv.dimension();
Vector temp(dim);
for (int i = 0; i < dim; i++) temp[i] = dv[i] / d;
return temp;
}
Vector operator + (const Vector& v1, const Vector& v2) {
int dim = v1.dimension();
Vector temp(dim);
for (int i = 0; i < dim; i++) temp[i] = v1[i] + v2[i];
return temp;
}
Vector operator - (const Vector& v1, const Vector& v2) {
int dim = v1.dimension();
Vector temp(dim);
for (int i = 0; i < dim; i++) temp[i] = v1[i] - v2[i];
return temp;
}
double Vector::abs() {
return std::sqrt(norm());
}
double Vector::norm() {
double sum = 0;
for (int i = 0; i < dim; i++) sum += v[i] * v[i];
return sum;
}
double Vector::dot(const Vector& dv) {
double sum = 0;
for (int i = 0; i < dim; i++) sum += v[i] * dv[i];
return sum;
}
std::ostream& operator<<(std::ostream& os, const Vector& dv) {
for (int i = 0; i < dv.dim; i++) {
os << dv.v[i];
if (i < dv.dim-1)
os << '\t';
else
os << '\n';
}
return os;
}
// ComplexVector implementation
ComplexVector::ComplexVector(int dim) {
v = new std::complex<double> [this->dim = dim];
for (int i = 0; i < dim; i++) v[i] = 0.0;
}
ComplexVector::ComplexVector(const ComplexVector& cv) {
v = new std::complex<double> [dim = cv.dim];
for (int i = 0; i < dim; i++) v[i] = cv.v[i];
}
ComplexVector& ComplexVector::operator = (const ComplexVector& cv) {
if (this != &cv) {
if (dim != cv.dim) {
delete [] v;
v = new std::complex<double> [dim = cv.dim];
}
for (int i = 0; i < dim; i++) v[i] = cv[i];
}
return *this;
}
// FFT implementation
void FFT::transform(ComplexVector& data) {
N = data.dimension();
f = &data;
bitReverse();
for (int n = 1; n < N; n *= 2)
DanielsonLanczos(n);
for (int i = 0; i < N; ++i)
(*f)[i] /= std::sqrt(double(N));
}
void FFT::inverseTransform(ComplexVector& data) {
inverse = true;
transform(data);
inverse = false;
}
void FFT::bitReverse() {
int j = 1;
for (int i = 1; i < N; ++i) {
if (i < j) {
std::complex<double> temp = (*f)[i-1];
(*f)[i-1] = (*f)[j-1];
(*f)[j-1] = temp;
}
int k = N / 2;
while ( k < j ) {
j -= k;
k /= 2;
}
j += k;
}
}
void FFT::DanielsonLanczos(int n) {
const double pi = 4 * atan(1.0);
std::complex<double> W(0, pi / n);
W = inverse ? std::exp(-W) : std::exp(W);
std::complex<double> W_j(1, 0);
for (int j = 0; j < n; ++j) {
for (int i = j; i < N; i += 2 * n) {
std::complex<double> temp = W_j * (*f)[n+i];
(*f)[n+i] = (*f)[i] - temp;
(*f)[i] += temp;
}
W_j *= W;
}
}
Vector FFT::power(ComplexVector& data) {
Vector P(1 + N / 2);
P[0] = std::norm(data[0]) / double(N);
for (int i = 1; i < N / 2; i++)
P[i] = (std::norm(data[i]) + std::norm(data[N-i])) / double(N);
P[N/2] = std::norm(data[N/2]) / double(N);
return P;
}
// Solving tridiagonal complex matrices
void solveTridiagonal(
ComplexVector& a, ComplexVector& b, ComplexVector& c,
ComplexVector& r, ComplexVector& u)
{
int n = a.dimension();
ComplexVector gamma(n);
std::complex<double> beta = b[0];
u[0] = r[0] / beta;
for (int j = 1; j < n; j++) {
gamma[j] = c[j-1] / beta;
beta = b[j] - a[j] * gamma[j];
u[j] = (r[j] - a[j] * u[j-1]) / beta;
}
for (int j = n - 2; j >= 0; j--)
u[j] -= gamma[j+1] * u[j+1];
}
void solveTridiagonalCyclic(
ComplexVector& a, ComplexVector& b, ComplexVector& c,
std::complex<double> alpha, std::complex<double> beta,
ComplexVector& r, ComplexVector& x)
{
int n = a.dimension();
ComplexVector bb(n), u(n), z(n);
std::complex<double> gamma = -b[0];
bb[0] = b[0] - gamma;
bb[n-1] = b[n-1] - alpha * beta / gamma;
for (int i = 1; i < n-1; i++)
bb[i] = b[i];
solveTridiagonal(a, bb, c, r, x);
u[0] = gamma;
u[n-1] = alpha;
for (int i = 1; i < n-1; i++)
u[i] = 0.0;
solveTridiagonal(a, bb, c, u, z);
std::complex<double> fact = x[0] + beta * x[n-1] / gamma;
fact /= 1.0 + z[0] + beta * z[n-1] / gamma;
for (int i = 0; i < n; i++)
x[i] -= fact * z[i];
}
} /* end namespace cpl */
it's probably your build script that's not configured correctly. Your code compiled for me when I used the following commands:
g++ -c Vector.cpp -o Vector.o
g++ -c Wavepacket.cpp -o Wavepacket.o
g++ Vector.o Wavepacket.o -lGL -lGLU -lglut -o app