I'm trying to create a class that can procedurally create prisms (or cylinders if the precision is high enough) but only the sides of the 3d model are showing (not the top and bottom). This is using openGL and c++. Not going for efficiency, just modifying a previous class that made a sphere.
#define numSlices 2
Prism::Prism() {
init(3);
}
Prism::Prism(int prec) {
init(prec);
}
float Prism::toRadians(float degrees) { return (degrees * 2.0f * 3.14159f) / 360.0f; }
void Prism::init(int prec) {
prec = (prec < 3) ? 3 : prec;
numVertices = (prec + 1) * (numSlices+1);
numIndices = prec * numSlices * 6;
for (int i = 0; i < numVertices; i++) { vertices.push_back(glm::vec3()); }
for (int i = 0; i < numVertices; i++) { texCoords.push_back(glm::vec2()); }
for (int i = 0; i < numVertices; i++) { normals.push_back(glm::vec3()); }
for (int i = 0; i < numVertices; i++) { tangents.push_back(glm::vec3()); }
for (int i = 0; i < numIndices; i++) { indices.push_back(0); }
// calculate triangle vertices
for (int i = 0; i <= numSlices; i++) {
for (int j = 0; j <= prec; j++) {
float y = i;
float x = -(float)cos(toRadians(j * 360.0f / (float)prec));
float z = (float)sin(toRadians(j * 360.0f / (float)prec));
vertices[i * (prec + 1) + j] = glm::vec3(x, y, z);
texCoords[i * (prec + 1) + j] = glm::vec2(((float)j / prec), ((float)i / numSlices));
}
}
// calculate triangle indices
for (int i = 0; i < numSlices; i++) {
for (int j = 0; j < prec; j++) {
indices[6 * (i * prec + j) + 0] = i * (prec + 1) + j;
indices[6 * (i * prec + j) + 1] = i * (prec + 1) + j + 1;
indices[6 * (i * prec + j) + 2] = (i + 1) * (prec + 1) + j;
indices[6 * (i * prec + j) + 3] = i * (prec + 1) + j + 1;
indices[6 * (i * prec + j) + 4] = (i + 1) * (prec + 1) + j + 1;
indices[6 * (i * prec + j) + 5] = (i + 1) * (prec + 1) + j;
}
}
}
Any tips or solutions that stick closely to the code already written would much appreciated.
To render the top and bottom of the cylinder, you can create a "triangle fan" that starts from a vertex at the center of the top/bottom of the cylinder and creates one triangle for every side.
Adapting your code: (untested, I may have made mistakes against winding order)
int bottom_center = vertices.length(); vertices.push_back(glm::vec3(0,0,0));
int top_center = vertices.length(); vertices.push_back(glm::vec3(0,numSlices,0));
// Bottom
for (int j = 0; j < prec; j++) {
int base = 0;
indices.push_back(bottom_center);
indices.push_back(base+j);
indices.push_back(base+j+1);
}
// Top
for (int j = 0; j < prec; j++) {
int base = numSlices * (prec+1);
indices.push_back(top_center);
indices.push_back(base+j);
indices.push_back(base+j+1);
}
See http://www.songho.ca/opengl/gl_cylinder.html for a more worked-out example.
Related
I have a working sequential Crout Decomposition algorithm that I need to speed up if possible. I have looked online at various OpenMP methods of parallelising the algorithm and I can only get it to work correctly on the lower triangular matrix part of the code. The upper yields wrong results
I feel like I have been looking at the code too long and I may be blind to a data dependency that I am overlooking
Sequential code is as follows, which works correct
for (i = 0; i < size; i++)
{
// Upper Triangle
for (j = 0; j < i; j++)
{
q = matx[j * size + i];
for (k = 0; k < j; k++)
{
q -= matx[j * size + k] * matx[k * size + i];
}
matx[j * size + i] = q;
}
// Lower Triangle
for (j = i; j < size; j++)
{
q = matx[j * size + i];
for (k = 0; k < i; k++)
{
q -= matx[j * size + k] * matx[k * size + i];
}
matx[j * size + i] = q;
}
}
Now here is the code with the appropriate OpenMP directives
for (i = 0; i < size; i++)
{
// Upper Triangle
#pragma omp parallel for private(j,k,q)
for (j = 0; j < i; j++)
{
q = matx[j * size + i];
for (k = 0; k < j; k++)
{
q -= matx[j * size + k] * matx[k * size + i];
}
matx[j * size + i] = q;
}
// Lower Triangle
#pragma omp parallel for private(j,k,q)
for (j = i; j < size; j++)
{
q = matx[j * size + i];
for (k = 0; k < i; k++)
{
q -= matx[j * size + k] * matx[k * size + i];
}
matx[j * size + i] = q;
}
}
If only the lower triangle is in parallel I yield the correct decompostion, however the upper throws out discrepancies
Many thanks for any help with this
I'm trying to rewrite the main loop in a physics simulation and split the workload between more threads.
It calls dostuff on every unique pair of indices and looks like this:
for (int i = 0; i < n - 1; ++i)
{
for (int j = i + 1; j < n; ++j)
{
dostuff(i, j);
}
}
I came up with two options:
//#1
//sqrt is implemented as binary search on ints, floors the result
for (int x = 0; x < n * (n - 1) / 2; ++x)
{
int i = (1 + sqrt(1 + 8 * x)) / 2;
int j = x - i * (i - 1) / 2;
dostuff(i, j);
}
//#2
for (int x = 0; x < n * n; ++x)
{
int i = x % n;
int j = x / n;
if (i < j)
dostuff(i, j);
}
And for each option, there is corresponding thread loop using shared atomic counter:
//#1
while(int x = counter.fetch_add(1) < n * (n - 1) / 2)
{
int i = (1 + sqrt(1 + 8 * x)) / 2;
int j = x - i * (i - 1) / 2;
dostuff(i, j);
}
//#2
while(int x = counter.fetch_add(1) < n * n)
{
int i = x % n;
int j = x / n;
if (i < j)
dostuff(i, j);
}
My question is, what is the best way to share the workload of the main loop between threads for n < 10^6?
EDIT:
//dostuff
Element& a = elements[i];
Element& b = elements[j];
glm::dvec3 r = b.getPosition() - a.getPosition();
double rv = glm::length(r);
double base = G / (rv * rv);
glm::dvec3 dir = glm::normalize(r);
glm::dvec3 bd = dir * base;
accelerations[i] += bd * b.getMass();
accelerations[j] -= bd * a.getMass();
Your work is a triangle. You want to.divide the triangle into k distinct pieces.
If k is a power of 2 you can do this:
a
a a
b c d
b c d d
Each of those regions are equal in size.
I am working on creating my own implementation of a separable sobel filter implementation. My function has as input the kernelSize, the horizontal filter of gradient Y as pixelsY1, the vertical filter of gradient Y as pixelsY2, the horizontal filter of gradient X as pixelsX1, the vertical filter of gradient X as pixelsX2.
The input of X1 is [1, 0, -1] (horizontal)
The input of X2 is [1, 2, 1] (vertical)
The input of Y1 is [1, 2, 1] (horizontal)
The input of Y2 is [1, 0 -1] (vertical)
void gradientFilter1D(Mat& img, int kernelSize, vector<double> pixelsY1, vector<double> pixelsY2, vector<double> pixelsX1, vector<double> pixelsX2)
{
int sumMin = INT_MAX, sumMax = INT_MIN;
//gradient X
vector<vector<int>> pixelsX(img.rows, vector<int>(img.cols, 0));
//gradient Y
vector<vector<int>> pixelsY(img.rows, vector<int>(img.cols, 0));
vector<vector<int>> sumArray(img.rows, vector<int>(img.cols, 0));
for (int j = kernelSize / 2; j < img.rows - kernelSize / 2; j++)
{
for (int i = kernelSize / 2; i < img.cols - kernelSize / 2; i++)
{
double totalX = 0;
double totalY = 0;
//this is the horizontal multiplication
for (int x = -kernelSize / 2; x <= kernelSize / 2; x++)
{
totalY += img.at<uchar>(j, i + x) * pixelsY1[x + (kernelSize / 2)];
totalX += img.at<uchar>(j, i + x) * pixelsX1[x + (kernelSize / 2)];
//cout << int(img.at<uchar>(j, i + x)) << " " << pixelsY1[x + (kernelSize / 2)] << endl;
}
pixelsX[j][i] = totalX;
pixelsY[j][i] = totalY;
}
}
for (int j = kernelSize / 2; j < img.rows - kernelSize / 2; j++)
{
for (int i = kernelSize / 2; i < img.cols - kernelSize / 2; i++)
{
double totalX = 0;
double totalY = 0;
//this is the vertical multiplication
for (int x = -kernelSize / 2; x <= kernelSize / 2; x++)
{
totalY += pixelsY[j + x][i] * pixelsY2[x + (kernelSize / 2)];
totalX += pixelsX[j + x][i] * pixelsX2[x + (kernelSize / 2)];
//cout << int(img.at<uchar>(j, i + x)) << " " << pixelsY1[x + (kernelSize / 2)] << endl;
}
pixelsX[j][i] = totalX;
pixelsY[j][i] = totalY;
}
}
for (int j = 0; j < img.rows; j++)
{
for (int i = 0; i < img.cols; i++)
{
int sum;
sum = sqrt(pow(pixelsX[j][i], 2) + pow(pixelsY[j][i], 2));
sumArray[j][i] = sum;
sumMin = sumMin < sum ? sumMin : sum;
sumMax = sumMax > sum ? sumMax : sum;
}
}
//normalization
for (int j = 0; j < img.rows; j++)
for (int i = 0; i < img.cols; i++)
{
sumArray[j][i] = (sumArray[j][i] - sumMin) * ((255.0 - 0) / (sumMax - sumMin)) + 0;
img.at<uchar>(j, i) = sumArray[j][i];
}
}
Input Image:
Output Image:
What am I doing wrong?
The separable filter is computed in what are effectively two passes. (The passes can be interleaved, but all the values used by the vertical filter have to have already been computed by the horizontal filter if doing it in that order.) Right below the comment //then here I do the vertical multiplication there are accesses to pixelsX and pixelsY that are effectively a second pass of the separable filter. The values being accessed for negative values of x have been previously computed and the ones for positive values of x have not yet been computed by the horizontal pass.
check out Halide. It makes this sort of code a lot easier and more performant. (A double nesting of std::vector is not a good way to go.)
Okay, so my mistake was actually in this
for (int j = kernelSize / 2; j < img.rows - kernelSize / 2; j++)
{
for (int i = kernelSize / 2; i < img.cols - kernelSize / 2; i++)
{
double totalX = 0;
double totalY = 0;
//this is the vertical multiplication
for (int x = -kernelSize / 2; x <= kernelSize / 2; x++)
{
totalY += pixelsY[j + x][i] * pixelsY2[x + (kernelSize / 2)];
totalX += pixelsX[j + x][i] * pixelsX2[x + (kernelSize / 2)];
//cout << int(img.at<uchar>(j, i + x)) << " " << pixelsY1[x + (kernelSize / 2)] << endl;
}
pixelsX[j][i] = totalX; <---- I overwrite the old values
pixelsY[j][i] = totalY; <--- I overwrite the old values
}
}
So, pixelsX[j][i] = totalX and so forth is wrong, because I need the old values in order to finish the computation in the rest of the j, and i loops. So, I created another vector of vectors and pushed in it the totalX's and Y's, and this solved my issue.
block_sparrse_matrix.h
#include "ldl_decomposition.h"
SMVS_NAMESPACE_BEGIN
template<int N>
void
BlockSparseMatrix<N>::invert_blocks_inplace(void)
{
for (std::size_t i = 0; i < this->values.size(); ++i)
{
std::array<double, N * N> b = values[i];
ldl_inverse(b.begin(), N);
bool nancheck = false;
for (int i = 0; i < N * N; ++i)
if (std::isnan(b[i]))
nancheck = true;
if(nancheck)
continue;
values[i] = b;
}
}
ldl_decomposition.h
SMVS_NAMESPACE_BEGIN
template<typename T>
void
ldl_inverse(T * A, int const size)
{
T * L = new T[size * size];
T * D = new T[size];
std::fill(L, L + size * size, 0.0);
std::fill(D, D + size, 0.0);
/* Factorize A into LDL^T */
for (int j = 0; j < size; ++j)
{
D[j] = A[j * size + j];
L[j * size + j] = 1.0;
for (int k = 0 ; k < j; ++k)
D[j] -= (L[j * size + k] * L[j * size + k]) * D[k];
if (D[j] == 0.0)
return;
for (int i = j+1; i < size; ++i)
{
L[i * size + j] = A[i * size + j];
for (int k = 0 ; k < j; ++k)
L[i * size + j] -= L[i * size + k] * D[k] * L[j * size + k];
L[i * size + j] /= D[j];
}
}
/* Invert L */
for (int i = 0; i < size; ++i)
for (int j = i+1; j < size; ++j)
{
T sum(0);
for (int k = i ; k < j; ++k)
sum -= L[j * size + k] * L[k * size + i];
L[j * size + i] = sum;
}
/* Invert D */
for (int i = 0; i < size; ++i)
D[i] = 1.0 / D[i];
/* Combine Matrices */
combine_ldl(L, D, size, A);
/* Cleanup memory */
delete[] L;
delete[] D;
}
I want to make it work on Windows, but it is error.
Error C2672 'ldl_inverse': no matching overloaded function found
(compiling source file E:\mve\libs\smvsrecon\gauss_newton_step.cc)
Error C2784 'void smvs::ldl_inverse(T *,const int)': could not deduce
template argument for 'T *' from 'std::_Array_iterator<_Ty,16>'
I originally had 3 equations: Pu, Pm & Pd. It ran fine.
Once I introduced the if statement, with variations on the 3 equations, depending on the loop iteration, I receive a runtime error.
Any help would be appreciated.
Cheers in advance.
#include <cmath>
#include <iostream>
#include <vector>
#include <iomanip>
int Rounding(double x)
{
int Integer = (int)x;
double Decimal = x - Integer;
if (Decimal > 0.49)
{
return (Integer + 1);
}
else
{
return Integer;
}
}
int main()
{
double a = 0.1;
double sigma = 0.01;
int delta_t = 1;
double M = -a * delta_t;
double V = sigma * sigma * delta_t;
double delta_r = sqrt(3 * V);
int count;
double PuValue;
double PmValue;
double PdValue;
int j_max;
int j_min;
j_max = Rounding(-0.184 / M);
j_min = -j_max;
std::vector<std::vector<double>> Pu((20), std::vector<double>(20));
std::vector<std::vector<double>> Pm((20), std::vector<double>(20));
std::vector<std::vector<double>> Pd((20), std::vector<double>(20));
std::cout << std::setprecision(10);
for (int i = 0; i <= 2; i++)
{
count = 0;
for (int j = i; j >= -i; j--)
{
count = count + 1;
if (j = j_max) // Exhibit 1C
{
PuValue = 7.0/6.0 + (j * j * M * M + 3 * j * M)/2.0;
PmValue = -1.0/3.0 - j * j * M * M - 2 * j * M;
PdValue = 1.0/6.0 + (j * j * M * M + j * M)/2.0;
}
else if (j = j_min) // Exhibit 1B
{
PuValue = 1.0/6.0 + (j * j * M * M - j * M)/2.0;
PmValue = -1.0/3.0 - j * j * M * M + 2 * j * M;
PdValue = 7.0/6.0 + (j * j * M * M - 3 * j * M)/2.0;
}
else
{
PuValue = 1.0/6.0 + (j * j * M * M + j * M)/2.0;
PmValue = 2.0/3.0 - j * j * M * M;
PdValue = 1.0/6.0 + (j * j * M * M - j * M)/2.0;
}
Pu[count][i] = PuValue;
Pm[count][i] = PmValue;
Pd[count][i] = PdValue;
std::cout << Pu[count][i] << ", ";
}
std::cout << std::endl;
}
return 0;
}
You are assigning instead of checking for equal: j_max to j in your if statements.
if (j = j_max)
// ^
else if (j = j_min)
// ^
Change if (j = j_max) to if (j == j_max),
And else if (j = j_min) to else if (j == j_min).
Correct the following if conditional check and all other instances of an if check
if(j=j_max)
with
if (j == j_max)
you are checking for an equality not assigning.
Your code was going into an infinite loop.