DirectXMath vector operations precision - c++

I'm having strange results of XMVector3AngleBetweenVectors function. Consider this code:
float angle = XMConvertToDegrees(XMVectorGetX(
XMVector3AngleBetweenVectors(GMathFV(XMFLOAT3(0.0f, 100.0f, 0.0f)),
GMathFV(XMFLOAT3(0.0f, 200.0f, 0.0f)))));
It's looking for angle between two 3D vectors, described by XMFLOAT3 structures. GMathFV is user-defined function which converts XMFLOAT3 to XMVECTOR as follows:
inline XMVECTOR GMathFV(XMFLOAT3& val)
{
return XMLoadFloat3(&val);
}
Everything else is directxmath.h library. Here everything is fine and result angle is 0.00000 just as expected.
But for other vectors with negative y-axis value, for example:
float angle = XMConvertToDegrees(XMVectorGetX(
XMVector3AngleBetweenVectors(GMathFV(XMFLOAT3(0.0f, -100.0f, 0.0f)),
GMathFV(XMFLOAT3(0.0f, -99.0f, 0.0f)))));
Result is 0.0197823402, which I can hardly call a zero angle.
Please someone help me figure out the problem. Is it negative number precision, too close vector coordinates or maybe something else?
UPD: Amazing, but it gives 0.0197823402 for a(0.0f, 100.0f, 0.0f) x b(0.0f, 99.0f, 0.0f), but 0.000000 for a(0.0f, 101.0f, 0.0f) x b(0.0f, 100.0f, 0.0f)

DirectXMath is designed for 32bit floating point math. You're seeing floating point error escalation. Here's the definition of XMVector3AngleBetweenVectors.
inline XMVECTOR XM_CALLCONV XMVector3AngleBetweenVectors(FXMVECTOR V1, FXMVECTOR V2)
{
XMVECTOR L1 = XMVector3ReciprocalLength(V1);
XMVECTOR L2 = XMVector3ReciprocalLength(V2);
XMVECTOR Dot = XMVector3Dot(V1, V2);
L1 = XMVectorMultiply(L1, L2);
XMVECTOR CosAngle = XMVectorMultiply(Dot, L1);
CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne.v, g_XMOne.v);
return XMVectorACos(CosAngle);
}
In your first example CosAngle equals 1.000000000
In your second example CosAngle equals 0.999999940
XMVectorACos(0.999999940) = 0.000345266977
This large error comes from a polynomial approximation of ACos. In general you should avoid trigonometric inverses whenever possible. They are slow and noisy. Here's the definition so you can get an idea of its size.
inline XMVECTOR XM_CALLCONV XMVectorACos (FXMVECTOR V)
{
__m128 nonnegative = _mm_cmpge_ps(V, g_XMZero);
__m128 mvalue = _mm_sub_ps(g_XMZero, V);
__m128 x = _mm_max_ps(V, mvalue); // |V|
// Compute (1-|V|), clamp to zero to avoid sqrt of negative number.
__m128 oneMValue = _mm_sub_ps(g_XMOne, x);
__m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue);
__m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|V|)
// Compute polynomial approximation
const XMVECTOR AC1 = g_XMArcCoefficients1;
XMVECTOR vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(3, 3, 3, 3) );
__m128 t0 = _mm_mul_ps(vConstants, x);
vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(2, 2, 2, 2) );
t0 = _mm_add_ps(t0, vConstants);
t0 = _mm_mul_ps(t0, x);
vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(1, 1, 1, 1) );
t0 = _mm_add_ps(t0, vConstants);
t0 = _mm_mul_ps(t0, x);
vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(0, 0, 0, 0) );
t0 = _mm_add_ps(t0, vConstants);
t0 = _mm_mul_ps(t0, x);
const XMVECTOR AC0 = g_XMArcCoefficients0;
vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(3, 3, 3, 3) );
t0 = _mm_add_ps(t0, vConstants);
t0 = _mm_mul_ps(t0, x);
vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(2, 2, 2, 2) );
t0 = _mm_add_ps(t0, vConstants);
t0 = _mm_mul_ps(t0, x);
vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(1, 1, 1, 1) );
t0 = _mm_add_ps(t0, vConstants);
t0 = _mm_mul_ps(t0, x);
vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(0, 0, 0, 0) );
t0 = _mm_add_ps(t0, vConstants);
t0 = _mm_mul_ps(t0, root);
__m128 t1 = _mm_sub_ps(g_XMPi, t0);
t0 = _mm_and_ps(nonnegative, t0);
t1 = _mm_andnot_ps(nonnegative, t1);
t0 = _mm_or_ps(t0, t1);
return t0;
}

Related

GLM plane to plane trasformation

I am trying to use plane to plane transformation method in glm.
But rotations are wrong, I use this code in Eigen, also in CGAL libraries. Everything was working correctly. But something off is in GLM.
Can someone tell me why this orientation code produces wrong results?
inline glm::mat4 plane_to_plane(
const std::array<glm::vec3, 4>& plane_0,
const std::array<glm::vec3, 4>& plane_1
//IK::Vector_3 O1, IK::Vector_3 X1, IK::Vector_3 Y1, IK::Vector_3 Z1
) {
// transformation maps P0 to P1, P0+X0 to P1+X1, ...
//Move to origin -> T0 translates point P0 to (0,0,0)
glm::mat4 T0 = glm::translate(glm::vec3(0 - plane_0[0].x, 0 - plane_0[0].y, 0 - plane_0[0].z));
//Rotate ->
glm::mat3 F0(
plane_0[1].x, plane_0[1].y, plane_0[1].z,
plane_0[2].x, plane_0[2].y, plane_0[2].z,
plane_0[3].x, plane_0[3].y, plane_0[3].z
);
glm::mat3 F1(
plane_1[1].x, plane_1[2].x, plane_1[3].x,
plane_1[1].y, plane_1[2].y, plane_1[3].y,
plane_1[1].z, plane_1[2].z, plane_1[3].z
);
glm::mat3 R = F1 * F0;
glm::mat4 R_ = glm::mat4(R);
//Move to 3d -> T1 translates (0,0,0) to point P1
glm::mat4 T1 = glm::translate(glm::vec3(plane_1[0].x - 0, plane_1[0].y - 0, plane_1[0].z - 0));
return T1 * R_ * T0;
}
I use two planes for the transformation:
std::vector<float> plines_v = {
-0.5, -0.5, 0, 0.5, -0.5, 0, 0.5, 0, 0, 0.307011, 0.5, 0, 0.307011, 0.5, 0.136471, 0.5, 0, 0.136471, 0.5, -0.5, 0.136471, -0.5, -0.5, 0.136471, -0.5, 0.5, 0.136471, -0.5, 0.5, 0, -0.5, -0.5, 0
};
auto plane_0 = std::array<glm::vec3, 4>{
glm::vec3(-0.5, -0.5, 0),
glm::vec3(1, 0, 0),
glm::vec3(0, 1, 0),
glm::vec3(0, 0, 1),
};
auto plane_1 = std::array<glm::vec3, 4>{
glm::vec3(-0.5, -0.5, 0),
glm::vec3(0.912851, 0.035424, -0.406752),
glm::vec3(0.225204, -0.874665, 0.429238),
glm::vec3(-0.340566, -0.483432, -0.806417),
};
auto xform = opengl_transform::plane_to_plane(plane_0, plane_1);
for (int i = 0; i < plines_v.size(); i += 3) {
glm::vec4 v(plines_v[i + 0], plines_v[i + 1], plines_v[i + 2], 1);
v = xform*v;
plines_v[i + 0] = v.x;
plines_v[i + 1] = v.y;
plines_v[i + 2] = v.z;
}
Long story short, I tried to inverse R matrix with the following code that seems to work.
Any ideas how to write F0 and F1 matrices without inverting them afterwards?
inline glm::mat4 plane_to_plane(
const std::array<glm::vec3, 4>& plane_0,
const std::array<glm::vec3, 4>& plane_1
//IK::Vector_3 O1, IK::Vector_3 X1, IK::Vector_3 Y1, IK::Vector_3 Z1
) {
// transformation maps P0 to P1, P0+X0 to P1+X1, ...
//Move to origin -> T0 translates point P0 to (0,0,0)
glm::mat4 T0 = glm::translate(glm::vec3(0 - plane_0[0].x, 0 - plane_0[0].y, 0 - plane_0[0].z));
//Rotate ->
glm::mat3 F0(
plane_0[1].x, plane_0[1].y, plane_0[1].z,
plane_0[2].x, plane_0[2].y, plane_0[2].z,
plane_0[3].x, plane_0[3].y, plane_0[3].z
);
glm::mat3 F1(
plane_1[1].x, plane_1[2].x, plane_1[3].x,
plane_1[1].y, plane_1[2].y, plane_1[3].y,
plane_1[1].z, plane_1[2].z, plane_1[3].z
);
glm::mat3 R = F1 * F0;
glm::mat3 R_inv(1.0);
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
R_inv[j][i] = R[i][j];
}
}
glm::mat4 R_ = glm::mat4(R_inv);
//Move to 3d -> T1 translates (0,0,0) to point P1
glm::mat4 T1 = glm::translate(glm::vec3(plane_1[0].x, plane_1[0].y, plane_1[0].z));
return T1 * R_ * T0;
}

Raytracing program fails to detect intersections in c++

I am working on a simple raytracer in c++. I am currently implementing an intersection function but have encountered some issues.
For some reason, the collision detection only works for a tiny rectangle in my image. In the image below you can see that it draws the room quite fine for a small part of the screen but fails to do so for the rest of the scene. Only a small section gets drawn correctly.
Why does my intersection detection not work? I have included the code for the intersection and draw function below.
LoadTestModel(m_Model);
m_Light.position = glm::vec3(0.0f, -1.0f, 0.0);
m_Light.color = glm::vec3(0.f, 0.f, 0.f);
m_Light.ambient = glm::vec3(0.5f, 0.5f, 0.5f);
m_Camera.position = glm::vec3(0.0, 0.0, -2.0);
m_Camera.yaw = 0.0f;
}
void Lab2Scene::Draw(Window& window)
{
if (!m_RenderNext) return;
m_RenderNext = false;
for (uint32_t y = 0; y < window.GetHeight(); ++y)
{
for (uint32_t x = 0; x < window.GetWidth(); ++x)
{
Ray ray = {};
glm::vec3 d(x - (window.GetWidth() / 2), y - (window.GetHeight() / 2), (window.GetHeight() / 2));
d = glm::normalize(d);
ray.direction = d * m_Camera.GetRotationY();
ray.start = m_Camera.position;
// Find the closest intersection of the casted ray.
Intersection nearest_intersection = {};
if (ClosestIntersection(ray, m_Model, nearest_intersection))
{
//window.PutPixel(x, y, glm::vec3(1.f, 0.f, 0.f));
window.PutPixel(x, y, DirectLight(m_Light, nearest_intersection, m_Model) + m_Model[nearest_intersection.triangleIndex].color * m_Light.ambient); // DirectLight(m_Light, intersection, m_Model)
}
else
{
window.PutPixel(x, y, m_Light.color);
}
}
}
}
bool Lab2Scene::ClosestIntersection(const Ray& ray, const std::vector<Triangle>& triangles, Intersection& intersection)
{
float m = std::numeric_limits<float>::max();
intersection.distance = m;
bool inters = false;
for (int i = 0; i < triangles.size(); ++i) {
float dot = glm::dot(ray.direction, triangles[i].normal);
if (dot != 0) {
using glm::vec3;
using glm::mat3;
vec3 v0 = triangles[i].v0;
vec3 v1 = triangles[i].v1;
vec3 v2 = triangles[i].v2;
vec3 e1 = v1 - v0;
vec3 e2 = v2 - v0;
vec3 b = ray.start - v0;
mat3 A(-ray.direction, e1, e2);
vec3 x = glm::inverse(A) * b;
if (x[1] >= 0 && x[2] >= 0 && x[1] + x[2] <= 1 && x[0] >= 0) {
vec3 intersect = ray.start + (x[0] * ray.direction);
if (glm::distance(ray.start, intersect) <= intersection.distance) {
intersection.position = intersect;
intersection.distance = glm::distance(ray.start, intersect);
intersection.triangleIndex = i;
inters = true;
}
}
}
}
return inters;
}

OpenGL glGetUniformBlockIndex returns INVALID_INDEX on nvidea gpus's

I've been working on this opengl game engine for quite a while now, and I'm making a game in it as well with procedurally generated planets. However, I got some weird problems that I've been stuck on for 3 weeks now.
To generate the planets on the fly, I make use of a material consisting of a tesselation and geometry shader to do the generating in. This way, it's super fast. Now in the tessellation evaluation shader, I use a uniform buffer block to send the parameters for the planet generation to the gpu (which is I believe 48 bytes x number of noise layers).
Now all this works fine on my desktop PC which has a radeon r9 390, and also on my laptop which has a gtx 1060. However, on the following desktop gpu's i get an INVALID_INDEX when calling glGetUniformBlockIndex and obviously it doesn't work as it is trying to generate with incorrect parameters.
- gtx 1050
- gtx 1060
- gtx 960
- gtx 970
- rtx 2080
On the following gpu's everything is working fine with no errors:
- radeon r9 390
- rx 580
- hd 7770
I don't have any other gpus available to me to test any further.
Now after doing some research I know that there is a limit on how big your uniform buffer block can be, as well as how many and how many components you can have etc. However, considering I brought the max number of layers down to 1 and tested again, the problem still persisted. It also can't be a memory shortage issue because an hd 7770 has only 1 GB of vram, while a 1060 has between 4-6 GB of vram and it still happens on a 1060.
Another thing I found is that shader variables can be optimized out by the driver when the variable does not contribute to the output, and as you will be able to see from the shader code it deffinitely does contribute to the output.
So, onto some code
In the material class there is a CreateUniformBuffer function that gets the index of the uniform buffer block and binds it to a buffer allowing to edit it. Note that the program already crashes after getting INVALID_INDEX so getting the index must be where the issue lies.
GLuint Material::CreateUniformBuffer(const std::string& name, GLuint bufferSize)
{
GLuint uniformBlockIndex = glGetUniformBlockIndex(m_pShader->m_ShaderProgramID, name.data());
Utilities::Debug::LogGLError(glGetError());
if (uniformBlockIndex == GL_INVALID_INDEX)
{
Utilities::Debug::LogError("Material::CreateUniformBuffer > Uniform buffer block with name " + name + " not found!");
return 0;
}
Utilities::Debug::LogGLError(glGetError());
glUniformBlockBinding(m_pShader->m_ShaderProgramID, uniformBlockIndex, m_BufferBindIndex);
Utilities::Debug::LogGLError(glGetError());
// Uniform buffer object for lights
GLuint bufferID;
glGenBuffers(1, &bufferID);
Utilities::Debug::LogGLError(glGetError());
glBindBuffer(GL_UNIFORM_BUFFER, bufferID);
Utilities::Debug::LogGLError(glGetError());
glBufferData(GL_UNIFORM_BUFFER, bufferSize, NULL, GL_DYNAMIC_DRAW);
Utilities::Debug::LogGLError(glGetError());
glBindBufferBase(GL_UNIFORM_BUFFER, uniformBlockIndex, bufferID);
Utilities::Debug::LogGLError(glGetError());
glBindBuffer(GL_UNIFORM_BUFFER, 0);
Utilities::Debug::LogGLError(glGetError());
m_UniformBufferObjects.push_back(bufferID);
++m_BufferBindIndex;
return bufferID;
}
This is the tessellation evaluation shader, note that there is an #include "SimplexNoise" line at the top, which does not work in opengl, however, the engine has a precompiler stage for shaders where it reads through the shader code and replaces any #include directives with the contents of the file that its including before compiling the shader.
planet_te.shader
#version 450
#include "SimplexNoise.shader"
layout(triangles, equal_spacing, cw) in;
in vec3 tcPosition[];
out vec3 tePosition;
out float teElevation;
uniform int NumNoiseLayers;
struct NoiseLayer
{
float Strength;
float BaseRoughness;
float Roughness;
float Persistance;
vec3 Center;
float MinValue;
int NumLayers;
int UseFirstLayerAsMask;
int NoiseFilterType;
float Weight;
};
const int MaxNoiseLayers = 4;
layout(std140) uniform NoiseBlock
{
NoiseLayer NoiseLayers[MaxNoiseLayers];
} _NoiseData;
float Evaluate(vec3 p, int layer)
{
int filterType = _NoiseData.NoiseLayers[layer].NoiseFilterType;
if (filterType == 0)
return SimpleEvaluate(p, int(_NoiseData.NoiseLayers[layer].NumLayers), _NoiseData.NoiseLayers[layer].BaseRoughness, _NoiseData.NoiseLayers[layer].Roughness, _NoiseData.NoiseLayers[layer].Persistance, _NoiseData.NoiseLayers[layer].Center, _NoiseData.NoiseLayers[layer].MinValue, _NoiseData.NoiseLayers[layer].Strength);
return RigidEvaluate(p, int(_NoiseData.NoiseLayers[layer].NumLayers), _NoiseData.NoiseLayers[layer].BaseRoughness, _NoiseData.NoiseLayers[layer].Roughness, _NoiseData.NoiseLayers[layer].Persistance, _NoiseData.NoiseLayers[layer].Center, _NoiseData.NoiseLayers[layer].MinValue, _NoiseData.NoiseLayers[layer].Strength, _NoiseData.NoiseLayers[layer].Weight);
}
float CalculateTotalStrength()
{
float strength = 0.0;
for (int i = 0; i < NumNoiseLayers; i++)
{
strength += _NoiseData.NoiseLayers[i].Strength;
}
return strength;
}
float LayeredEvaluate(vec3 p)
{
float firstLayerValue = 0.0;
float elevationAverage = 0.0;
float totalStrength = CalculateTotalStrength();
float unscaledElevation = 0.0;
float scaledElevation = 0.0;
float noiseValue = 0.0;
float strengthPercentage = 0.0;
if (NumNoiseLayers > 0)
{
unscaledElevation = Evaluate(p, 0);
scaledElevation = max(0.0, unscaledElevation);
noiseValue = scaledElevation;
elevationAverage = unscaledElevation;
firstLayerValue = noiseValue;
}
for (int i = 1; i < NumNoiseLayers; i++)
{
float mask = (_NoiseData.NoiseLayers[i].UseFirstLayerAsMask == 1) ? firstLayerValue : 1.0;
unscaledElevation = Evaluate(p, 0);
scaledElevation = max(0.0, unscaledElevation);
elevationAverage += unscaledElevation;
noiseValue += scaledElevation;
}
elevationAverage /= totalStrength;
teElevation = clamp(elevationAverage * 115.0, -0.99, 0.99);
return noiseValue;
}
void main()
{
vec3 p0 = gl_TessCoord.x * tcPosition[0];
vec3 p1 = gl_TessCoord.y * tcPosition[1];
vec3 p2 = gl_TessCoord.z * tcPosition[2];
tePosition = normalize(p0 + p1 + p2);
float hieght = LayeredEvaluate(tePosition);
gl_Position = vec4(tePosition * (1.0 + hieght), 1);
}
SimplexNoise.shader
const int RandomSize = 256;
const float Sqrt3 = 1.7320508075688772935;
const float Sqrt5 = 2.2360679774997896964;
uniform int _random[512];
/// Skewing and unskewing factors for 2D, 3D and 4D,
/// some of them pre-multiplied.
const float F2 = 0.5 * (Sqrt3 - 1.0);
const float G2 = (3.0 - Sqrt3) / 6.0;
const float G22 = G2 * 2.0 - 1;
const float F3 = 1.0 / 3.0;
const float G3 = 1.0 / 6.0;
const float F4 = (Sqrt5 - 1.0) / 4.0;
const float G4 = (5.0 - Sqrt5) / 20.0;
const float G42 = G4 * 2.0;
const float G43 = G4 * 3.0;
const float G44 = G4 * 4.0 - 1.0;
const int[] Grad3 =
{
1, 1, 0, -1, 1, 0, 1, -1, 0,
-1, -1, 0, 1, 0, 1, -1, 0, 1,
1, 0, -1, -1, 0, -1, 0, 1, 1,
0, -1, 1, 0, 1, -1, 0, -1, -1
};
float Dot(int index, float x, float y, float z)
{
return Grad3[index] * x + Grad3[index + 1] * y + Grad3[index + 2] * z;
}
float Dot(int index, float x, float y)
{
return Grad3[index] * x + Grad3[index + 1] * y;
}
int FastFloor(float x)
{
return int(x) >= 0 ? int(x) : int(x) - 1;
}
float Evaluate(vec3 p)
{
float x = p.x;
float y = p.y;
float z = p.z;
float n0 = 0.0, n1 = 0.0, n2 = 0.0, n3 = 0.0;
// Noise contributions from the four corners
// Skew the input space to determine which simplex cell we're in
float s = (x + y + z) * F3;
// for 3D
int i = FastFloor(x + s);
int j = FastFloor(y + s);
int k = FastFloor(z + s);
float t = (i + j + k) * G3;
// The x,y,z distances from the cell origin
float x0 = x - (i - t);
float y0 = y - (j - t);
float z0 = z - (k - t);
// For the 3D case, the simplex shape is a slightly irregular tetrahedron.
// Determine which simplex we are in.
// Offsets for second corner of simplex in (i,j,k)
int i1, j1, k1;
// coords
int i2, j2, k2; // Offsets for third corner of simplex in (i,j,k) coords
if (x0 >= y0)
{
if (y0 >= z0)
{
// X Y Z order
i1 = 1;
j1 = 0;
k1 = 0;
i2 = 1;
j2 = 1;
k2 = 0;
}
else if (x0 >= z0)
{
// X Z Y order
i1 = 1;
j1 = 0;
k1 = 0;
i2 = 1;
j2 = 0;
k2 = 1;
}
else
{
// Z X Y order
i1 = 0;
j1 = 0;
k1 = 1;
i2 = 1;
j2 = 0;
k2 = 1;
}
}
else
{
// x0 < y0
if (y0 < z0)
{
// Z Y X order
i1 = 0;
j1 = 0;
k1 = 1;
i2 = 0;
j2 = 1;
k2 = 1;
}
else if (x0 < z0)
{
// Y Z X order
i1 = 0;
j1 = 1;
k1 = 0;
i2 = 0;
j2 = 1;
k2 = 1;
}
else
{
// Y X Z order
i1 = 0;
j1 = 1;
k1 = 0;
i2 = 1;
j2 = 1;
k2 = 0;
}
}
// A step of (1,0,0) in (i,j,k) means a step of (1-c,-c,-c) in (x,y,z),
// a step of (0,1,0) in (i,j,k) means a step of (-c,1-c,-c) in (x,y,z),
// and
// a step of (0,0,1) in (i,j,k) means a step of (-c,-c,1-c) in (x,y,z),
// where c = 1/6.
// Offsets for second corner in (x,y,z) coords
float x1 = x0 - i1 + G3;
float y1 = y0 - j1 + G3;
float z1 = z0 - k1 + G3;
// Offsets for third corner in (x,y,z)
float x2 = x0 - i2 + F3;
float y2 = y0 - j2 + F3;
float z2 = z0 - k2 + F3;
// Offsets for last corner in (x,y,z)
float x3 = x0 - 0.5;
float y3 = y0 - 0.5;
float z3 = z0 - 0.5;
// Work out the hashed gradient indices of the four simplex corners
int ii = i & 0xff;
int jj = j & 0xff;
int kk = k & 0xff;
// Calculate the contribution from the four corners
float t0 = 0.6 - x0 * x0 - y0 * y0 - z0 * z0;
if (t0 > 0)
{
t0 *= t0;
int gi0 = _random[ii + _random[jj + _random[kk]]] % 12;
n0 = t0 * t0 * Dot(gi0 * 3, x0, y0, z0);
}
float t1 = 0.6 - x1 * x1 - y1 * y1 - z1 * z1;
if (t1 > 0)
{
t1 *= t1;
int gi1 = _random[ii + i1 + _random[jj + j1 + _random[kk + k1]]] % 12;
n1 = t1 * t1 * Dot(gi1 * 3, x1, y1, z1);
}
float t2 = 0.6 - x2 * x2 - y2 * y2 - z2 * z2;
if (t2 > 0)
{
t2 *= t2;
int gi2 = _random[ii + i2 + _random[jj + j2 + _random[kk + k2]]] % 12;
n2 = t2 * t2 * Dot(gi2 * 3, x2, y2, z2);
}
float t3 = 0.6 - x3 * x3 - y3 * y3 - z3 * z3;
if (t3 > 0)
{
t3 *= t3;
int gi3 = _random[ii + 1 + _random[jj + 1 + _random[kk + 1]]] % 12;
n3 = t3 * t3 * Dot(gi3 * 3, x3, y3, z3);
}
// Add contributions from each corner to get the final noise value.
// The result is scaled to stay just inside [-1,1]
return float(n0 + n1 + n2 + n3) * 32;
}
float Evaluate(vec3 p, float strength, float roughness, vec3 centre)
{
float noise = (Evaluate(p * roughness + centre) + 1.0) * 0.5;
return noise * strength;
}
float SimpleEvaluate(vec3 p, int numLayers, float baseRoughness, float roughness, float persistance, vec3 centre, float minValue, float strength)
{
float noiseValue = 0.0;
float frequency = baseRoughness;
float amplitude = 1.0;
for (int i = 0; i < numLayers; i++)
{
float v = Evaluate(p * frequency + centre);
noiseValue += (v + 1) * 0.5 * amplitude;
frequency *= roughness;
amplitude *= persistance;
}
//noiseValue = max(0.0, noiseValue - minValue);
return (noiseValue - minValue) * strength;
}
float RigidEvaluate(vec3 p, int numLayers, float baseRoughness, float roughness, float persistance, vec3 centre, float minValue, float strength, float weight)
{
float noiseValue = 0.0;
float frequency = baseRoughness;
float amplitude = 1.0;
weight = 1.0;
for (int i = 0; i < numLayers; i++)
{
float v = 1.0 - abs(Evaluate(p * frequency + centre));
v *= v;
v *= weight;
weight = v;
noiseValue += v * amplitude;
frequency *= roughness;
amplitude *= persistance;
}
//noiseValue = max(0.0, noiseValue - minValue);
return (noiseValue - minValue) * strength;
}
Note that this is a noise algorithm I found online and converted it to glsl code (shoutout to Sebastian Lague for his amazing series on procedural planets in Unity)
I am using SDL to open a window and to handle input, audio and textrendering, OpenGL 4.6 and GLEW 2.1.0
Things I've tried:
- Lower the size of the block by lowering the number of layers (so a smaller array)
- A uniform array instead of a uniform block (same outcome only the crash is not there, but for gpus that had the crash it just renders all planets incorrectly)
- Updating SDL and all its plugins to the latest version, and updating OpenGL to 4.6 from 3.1, also updating GLEW from 1.10.0 to 2.1.0
- Changing the name of the uniform block
- Playing around with amount of values inside the block (while respecting the size having to be a multiplication of 4 floats)
- Having binding = 0 next to the std140 in the glsl code
- Logging any errors or warnings from shader compilation to the console (no errors or warnings found)
I have a bit of a deadline next friday to upload a build to steam, and they won't except it if it only works on amd gpu's (understandable off course) so I'm hoping someone has an idea of what I'm doing wrong here, or has had a similar issue before in the past, any help is welcome!
EDIT: I tried using an SSBO, this had a harsh impact on performance, and while it does not crash anymore (because I'm not looking for the block index by name) it does not render anything on nvidea gpu's and spit out Unknown Errors in the log file.
Code for binding the SSBO
GLuint Material::CreateShaderStorageBuffer(const std::string& name, GLsizeiptr bufferSize, const void* data, GLint bindingIndex, GLenum usage)
{
GLuint ssbo;
glGenBuffers(1, &ssbo);
Utilities::Debug::LogGLError(glGetError());
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
Utilities::Debug::LogGLError(glGetError());
glBufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, data, usage);
Utilities::Debug::LogGLError(glGetError());
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, bindingIndex, ssbo);
Utilities::Debug::LogGLError(glGetError());
/*GLuint blockIndex = glGetProgramResourceIndex(m_pShader->m_ShaderProgramID, GL_SHADER_STORAGE_BLOCK, name.c_str());
Utilities::Debug::LogGLError(glGetError());
if (blockIndex == GL_INVALID_INDEX)
{
Utilities::Debug::LogError("Material::CreateShaderStorageBuffer > Shader Storage Buffer Block with name " + name + " not found!");
return 0;
}
glShaderStorageBlockBinding(m_pShader->m_ShaderProgramID, blockIndex, bindingIndex);
Utilities::Debug::LogGLError(glGetError());*/
glBindBuffer(GL_SHADER_STORAGE_BUFFER, NULL);
Utilities::Debug::LogGLError(glGetError());
return ssbo;
}
Code for setting SSBO data
void Material::WriteToShaderStorageBuffer(GLuint ssboID, const void* data, GLsizeiptr size)
{
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboID);
Utilities::Debug::LogGLError(glGetError());
GLvoid* bufferData = glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_WRITE_ONLY);
Utilities::Debug::LogGLError(glGetError());
memcpy(bufferData, data, size);
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
Utilities::Debug::LogGLError(glGetError());
glBindBuffer(GL_SHADER_STORAGE_BUFFER, NULL);
Utilities::Debug::LogGLError(glGetError());
}
The change in the shader to turn the uniform block into an SSBO
const int MaxNoiseLayers = 4;
layout(std430, binding = 0) buffer NoiseBlock
{
NoiseLayer NoiseLayers[MaxNoiseLayers];
} _NoiseData;
I figured out that the issue was caused by the int array buffer _random[512] in the SimplexNoise shader file, I'm not sure why this was causing issues on only nvidea gpu's but I am assuming it was some kind of limit it was exceeding on nvideas gpus that caused the entire shader to break without any warnings or errors.
I fixed it by turning this array into another UBO and bind it to index 1.

Get world coordinates from D3DXIntersectTri

I have a square area on which I have to determine where the mouse pointing.
With D3DXIntersectTri I can tell IF the mouse pointing on it, but I have trouble calculating the x,y,z coordinates.
The drawing from vertex buffer, which initialized with the vertices array:
vertices[0].position = D3DXVECTOR3(-10, 0, -10);
vertices[1].position = D3DXVECTOR3(-10, 0, 10);
vertices[2].position = D3DXVECTOR3( 10, 0, -10);
vertices[3].position = D3DXVECTOR3( 10, 0, -10);
vertices[4].position = D3DXVECTOR3(-10, 0, 10);
vertices[5].position = D3DXVECTOR3( 10, 0, 10);
I have this method so far, this is not giving me the right coordinates (works only on a small part of the area, near two of the edges and more less accurate inside):
BOOL Area::getcoord( Ray& ray, D3DXVECTOR3& coord)
{
D3DXVECTOR3 rayOrigin, rayDirection;
rayDirection = ray.direction;
rayOrigin = ray.origin;
float d;
D3DXMATRIX matInverse;
D3DXMatrixInverse(&matInverse, NULL, &matWorld);
// Transform ray origin and direction by inv matrix
D3DXVECTOR3 rayObjOrigin,rayObjDirection;
D3DXVec3TransformCoord(&rayOrigin, &rayOrigin, &matInverse);
D3DXVec3TransformNormal(&rayDirection, &rayDirection, &matInverse);
D3DXVec3Normalize(&rayDirection,&rayDirection);
float u, v;
BOOL isHit1, isHit2;
D3DXVECTOR3 p1, p2, p3;
p1 = vertices[3].position;
p2 = vertices[4].position;
p3 = vertices[5].position;
isHit1 = D3DXIntersectTri(&p1, &p2, &p3, &rayOrigin, &rayDirection, &u, &v, &d);
isHit2 = FALSE;
if(!isHit1)
{
p1 = vertices[0].position;
p2 = vertices[1].position;
p3 = vertices[2].position;
isHit2 = D3DXIntersectTri(&p1, &p2, &p3, &rayOrigin, &rayDirection, &u, &v, &d);
}
if(isHit1)
{
coord.x = 1 * ((1-u-v)*p3.x + u*p3.y + v*p3.z);
coord.y = 0.2f;
coord.z = -1 * ((1-u-v)*p1.x + u*p1.y + v*p1.z);
D3DXVec3TransformCoord(&coord, &coord, &matInverse);
}
if(isHit2)
{
coord.x = -1 * ((1-u-v)*p3.x + u*p3.y + v*p3.z);
coord.y = 0.2f;
coord.z = 1 * ((1-u-v)*p1.x + u*p1.y + v*p1.z);
D3DXVec3TransformCoord(&coord, &coord, &matWorld);
}
return isHit1 || isHit2;
}
Barycentric coordinates don't work the way you used them. u and v define the weight of the source vectors. So if you want to calculate the hit point, you will have to compute
coord = u * p1 + v * p2 + (1 - u - v) * p3
Alternatively you can use the d ray parameter:
coord = rayOrigin + d * rDirection
Both ways should result in the same coordinate.

gluDisk rotation for mapping

I'm trying to create sub-cursor for terrain mapping.
Basic by code: (old image, but rotation is same)
image http://www.sdilej.eu/pics/274a90360f9c46e2eaf94e095e0b6223.png
This is when i testing change glRotate ax to my numbers:
image2 http://www.sdilej.eu/pics/146bda9dc51708da54b9249706f874fc.png
What i want:
image3 http://www.sdilej.eu/pics/69721aa237608b423b635945d430e561.png
My code:
void renderDisk(float x1, float y1, float z1, float x2, float y2, float z2, float radius, int subdivisions, GLUquadricObj* quadric)
{
float vx = x2 - x1;
float vy = y2 - y1;
float vz = z2 - z1;
//handle the degenerate case of z1 == z2 with an approximation
if( vz == 0.0f )
vz = .0001f;
float v = sqrt( vx*vx + vy*vy + vz*vz );
float ax = 57.2957795f * acos( vz/v );
if(vz < 0.0f)
ax = -ax;
float rx = -vy * vz;
float ry = vx * vz;
glPushMatrix();
glTranslatef(x1, y1, z1);
glRotatef(ax, rx, ry, 0.0);
gluQuadricOrientation(quadric, GLU_OUTSIDE);
gluDisk(quadric, radius - 0.25, radius + 5.0, subdivisions, 5);
glPopMatrix();
}
void renderDisk_convenient(float x, float y, float z, float radius, int subdivisions)
{
// Mouse opacity
glColor4f( 0.0f, 7.5f, 0.0f, 0.5f );
GLUquadricObj* quadric = gluNewQuadric();
gluQuadricDrawStyle(quadric, GLU_LINE);
gluQuadricNormals(quadric, GLU_SMOOTH);
gluQuadricTexture(quadric, GL_TRUE);
renderDisk(x, y, z, x, y, z, radius, subdivisions, quadric);
gluDeleteQuadric(quadric);
}
renderDisk_convenient(posX, posY, posZ, radius, 20);
This is a simple one. In your call to renderDisk() you supply bad arguments. Looks like you copied the function from some tutorial without understanding how it works. The first three parameters control the center position, and the other three parameters control rotation using a second position which the disk is always facing. If the two positions are equal (which is your case), this line is executed:
//handle the degenerate case of z1 == z2 with an approximation
if( vz == 0.0f )
vz = .0001f;
And setting z to nonzero makes the disc perpendicular to XZ plane, which is also the horizontal plane for your terrain. So ... to make it okay, you need to modify your function like this:
void renderDisk_convenient(float x, float y, float z, float radius, int subdivisions)
{
// Mouse opacity
glColor4f( 0.0f, 7.5f, 0.0f, 0.5f );
GLUquadricObj* quadric = gluNewQuadric();
gluQuadricDrawStyle(quadric, GLU_LINE);
gluQuadricNormals(quadric, GLU_SMOOTH);
gluQuadricTexture(quadric, GL_TRUE);
float upX = 0, upY = 1, upZ = 0; // up vector (does not need to be normalized)
renderDisk(x, y, z, x + upX, y + upY, z + upZ, radius, subdivisions, quadric);
gluDeleteQuadric(quadric);
}
This should turn the disc into the xz plane so it will be okay if the terrain is flat. But in other places, you actually need to modify the normal direction (the (upX, upY, upZ) vector). If your terrain is generated from a heightmap, then the normal can be calculated using code such as this:
const char *p_s_heightmap16 = "ps_height_1k.png";
const float f_terrain_height = 50; // terrain is 50 units high
const float f_terrain_scale = 1000; // the longer edge of terrain is 1000 units long
TBmp *p_heightmap;
if(!(p_heightmap = p_LoadHeightmap_HiLo(p_s_heightmap16))) {
fprintf(stderr, "error: failed to load heightmap (%s)\n", p_s_heightmap16);
return false;
}
// load heightmap
TBmp *p_normalmap = TBmp::p_Alloc(p_heightmap->n_width, p_heightmap->n_height);
// alloc normalmap
const float f_width_scale = f_terrain_scale / max(p_heightmap->n_width, p_heightmap->n_height);
// calculate the scaling factor
for(int y = 0, hl = p_normalmap->n_height, hh = p_heightmap->n_height; y < hl; ++ y) {
for(int x = 0, wl = p_normalmap->n_width, wh = p_heightmap->n_width; x < wl; ++ x) {
Vector3f v_normal(0, 0, 0);
{
Vector3f v_pos[9];
for(int yy = -1; yy < 2; ++ yy) {
for(int xx = -1; xx < 2; ++ xx) {
int sx = xx + x;
int sy = yy + y;
float f_height;
if(sx >= 0 && sy >= 0 && sx < wh && sy < hh)
f_height = ((const uint16_t*)p_heightmap->p_buffer)[sx + sy * wh] / 65535.0f * f_terrain_height;
else
f_height = 0;
v_pos[(xx + 1) + 3 * (yy + 1)] = Vector3f(xx * f_width_scale, f_height, yy * f_width_scale);
}
}
// read nine-neighbourhood
/*
0 1 2
+----------+----------+
|\ | /|
| \ | / |
| \ | / |
| \ | / |
3|_________\|/_________|5
| 4/|\ |
| / | \ |
| / | \ |
| / | \ |
|/ | \|
+----------+----------+
6 7 8
*/
const int p_indices[] = {
0, 1, //4,
1, 2, //4,
2, 5, //4,
5, 8, //4,
8, 7, //4,
7, 6, //4,
6, 3, //4,
3, 0 //, 4
};
for(int i = 0; i < 8; ++ i) {
Vector3f a = v_pos[p_indices[i * 2]];
Vector3f b = v_pos[p_indices[i * 2 + 1]];
Vector3f c = v_pos[4];
// triangle
Vector3f v_tri_normal = (a - c).v_Cross(b - c);
v_tri_normal.Normalize();
// calculate normals
v_normal += v_tri_normal;
}
v_normal.Normalize();
}
// calculate normal from the heightmap (by averaging the normals of eight triangles that share the current point)
uint32_t n_normalmap =
0xff000000U |
(max(0, min(255, int(v_normal.z * 127 + 128))) << 16) |
(max(0, min(255, int(v_normal.y * 127 + 128))) << 8) |
max(0, min(255, int(-v_normal.x * 127 + 128)));
// calculate normalmap color
p_normalmap->p_buffer[x + wl * y] = n_normalmap;
// use the lightmap bitmap to store the results
}
}
(note this contains some structures and functions that are not included here so you won't be able to use this code directly, but the basic concept is there)
Once you have the normals, you need to sample normal under location (x, z) and use that in your function. This will still make the disc intersect the terrain where there is a steep slope next to flat surface (where the second derivative is high). In order to cope with that, you can either lift the cursor up a bit (along the normal), or disable depth testing.
If your terrain is polygonal, you could use vertex normals just as well, just take triangle that is below (x, y, z) and interpolate it's vertices normals to get the normal for the disc.
I hope this helps, feel free to comment if you need further advice ...