Related
Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 4 months ago.
Improve this question
I am currently comparing the implementation of a n-body simulation in the GPU using CUDA and OpenGL (Compute Shaders) for a project, but I run into a problem using shared memory.
First I implemented the version with no shared memory as follows:
CUDA
#include "helper_math.h"
//...
__device__ float dist2(float3 A, float3 B)
{
float3 C = A - B;
return dot(C, C);
}
__global__ void n_body_vel_calc(float3* positions, float3* velocities,
unsigned numParticles, float mass, float deltaTime)
{
unsigned i = blockDim.x * blockIdx.x + threadIdx.x;
if (i >= numParticles)
return;
const float G = 6.6743e-11f;
float3 cur_position = positions[i];
float3 force = make_float3(0.0f, 0.0f, 0.0f);
for (unsigned j = 0; j < numParticles; ++j)
{
if (i == j)
continue;
float3 neighbor_position = positions[j];
float inv_distance2 = 1.0f / dist2(cur_position, neighbor_position);
float3 direction = normalize(neighbor_position - cur_position);
force += G * mass * mass * inv_distance2 * direction;
}
float3 acceleration = force / mass;
velocities[i] += acceleration * deltaTime;
}
OpenGL
// glBufferStorage(GL_SHADER_STORAGE_BUFFER, ..., ..., ...);
#version 460
layout(local_size_x=128) in;
layout(location = 0) uniform int numParticles;
layout(location = 1) uniform float mass;
layout(location = 2) uniform float dt;
layout(std430, binding=0) buffer pblock { vec3 positions[]; };
layout(std430, binding=1) buffer vblock { vec3 velocities[]; };
float dist2(vec3 A, vec3 B)
{
vec3 C = A - B;
return dot( C, C );
}
void main()
{
int i = int(gl_GlobalInvocationID);
if (i >= numParticles)
return;
const float G = 6.6743e-11f;
vec3 cur_position = positions[i];
vec3 force = vec3(0.0);
for (uint j = 0; j < numParticles; ++j)
{
if (i == j)
continue;
vec3 neighbor_position = positions[j];
float inv_distance2 = 1.0 / dist2(cur_position, neighbor_position);
vec3 direction = normalize(neighbor_position - cur_position);
force += G * mass * mass * inv_distance2 * direction;
}
vec3 acceleration = force / mass;
velocities[i] += acceleration * dt;
}
With the same number of threads per group, number of particles and the same number of times executing the kernel, the CUDA version takes 82 ms and OpengGL takes 70 ms. Weird thing that there speed is much different, but I can attribute that to GLSL having geometric operations optimized somehow.
My problem comes next, when I write the versions with shared memory, which should increase the performance by not reading from global memory multiple times.
CUDA
__global__ void n_body_vel_calc(float3* positions, float3 * velocities, unsigned workgroupSize,
unsigned numParticles, float mass, float deltaTime)
{
// size of array == workgroupSize
extern __shared__ float3 temp_tile[];
unsigned i = blockDim.x * blockIdx.x + threadIdx.x;
if (i >= numParticles)
return;
const float G = 6.6743e-11f;
float3 cur_position = positions[i];
float3 force = make_float3(0.0f, 0.0f, 0.0f);
for (unsigned tile = 0; tile < numParticles; tile += workgroupSize)
{
temp_tile[threadIdx.x] = positions[tile + threadIdx.x];
__syncthreads();
for (unsigned j = 0; j < workgroupSize; ++j)
{
if (i == j || ((tile + j) >= numParticles))
continue;
float3 neighbor_position = temp_tile[j];
float inv_distance2 = 1.0f / dist2(cur_position, neighbor_position);
float3 direction = normalize(neighbor_position - cur_position);
force += G * mass * mass * inv_distance2 * direction;
}
__syncthreads();
}
float3 acceleration = force / mass;
velocities[i] += acceleration * deltaTime;
}
OpenGL
#version 460
layout(local_size_x=128) in;
layout(location = 0) uniform int numParticles;
layout(location = 1) uniform float mass;
layout(location = 2) uniform float dt;
layout(std430, binding=0) buffer pblock { vec3 positions[]; };
layout(std430, binding=1) buffer vblock { vec3 velocities[]; };
// Shared variables
shared vec3 temp_tile[gl_WorkGroupSize.x];
void main()
{
int i = int(gl_GlobalInvocationID);
if (i >= numParticles)
return;
const float G = 6.6743e-11f;
vec3 cur_position = positions[i];
vec3 force = vec3(0.0);
for (uint tile = 0; tile < numParticles; tile += gl_WorkGroupSize.x)
{
temp_tile[gl_LocalInvocationIndex] = positions[tile + gl_LocalInvocationIndex];
groupMemoryBarrier();
barrier();
for (uint j = 0; j < gl_WorkGroupSize.x; ++j)
{
if (i == j || (tile + j) >= numParticles)
continue;
vec3 neighbor_position = temp_tile[j];
float inv_distance2 = 1.0 / dist2(cur_position, neighbor_position);
vec3 direction = normalize(neighbor_position - cur_position);
force += G * mass * mass * inv_distance2 * direction;
}
groupMemoryBarrier();
barrier();
}
vec3 acceleration = force / mass;
velocities[i] += acceleration * dt;
}
My principal problem comes next. With the same parameters as above, the CUDA version increases its execution time to 128 ms (greatly diminishing its performance), and the OpenGL one took 68 (a small improvement over the other version).
I have compiled the CUDA version with the toolkit version 11.7 and 10.0 with MSVC V143 and V142 and the results are more or less the same.
Why the OpenGL implementation is faster with shared memory, but the CUDA one its not? Am I missing something?
I have a 3D Model in an OpenGL (C++) loaded using Assimp.
I need to move this model around the screen (translate) like it is always facing the camera in the X and Y axis (no Z axis).
It would be like moving this model like it is 2D only (but of course if I rotate it, it would show the Z axis as well).
my render function is :
camX = CamY = 0;
camZ = 5;
lookatX = lookatY = lookatZ = 0;
void C3DModel::render(void)
{
static float step = 0.0f;
setCamera(camX, camY, camZ, lookatX, lookatY, lookatZ);
translate(-3, 1, 0); // here is the issue .
scale(scaleFactor, scaleFactor, scaleFactor);
rotate(step, 0.0f, 1.0f, 0.0f);
}
void C3DModel::translate(float x, float y, float z)
{
float aux[16];
setTranslationMatrix(aux, x, y, z);
multMatrix(modelMatrix, aux);
setModelMatrix();
}
void C3DModel::setTranslationMatrix(float *mat, float x, float y, float z)
{
setIdentityMatrix(mat, 4);
mat[12] = x;
mat[13] = y;
mat[14] = z;
}
void C3DModel::setScaleMatrix(float *mat, float sx, float sy, float sz)
{
setIdentityMatrix(mat, 4);
mat[0] = sx;
mat[5] = sy;
mat[10] = sz;
}
void C3DModel::setRotationMatrix(float *mat, float angle, float x, float y, float z)
{
float radAngle = DegToRad(angle);
float co = cos(radAngle);
float si = sin(radAngle);
float x2 = x * x;
float y2 = y * y;
float z2 = z * z;
mat[0] = x2 + (y2 + z2) * co;
mat[4] = x * y * (1 - co) - z * si;
mat[8] = x * z * (1 - co) + y * si;
mat[12] = 0.0f;
mat[1] = x * y * (1 - co) + z * si;
mat[5] = y2 + (x2 + z2) * co;
mat[9] = y * z * (1 - co) - x * si;
mat[13] = 0.0f;
mat[2] = x * z * (1 - co) - y * si;
mat[6] = y * z * (1 - co) + x * si;
mat[10] = z2 + (x2 + y2) * co;
mat[14] = 0.0f;
mat[3] = 0.0f;
mat[7] = 0.0f;
mat[11] = 0.0f;
mat[15] = 1.0f;
}
void C3DModel::rotate(float angle, float x, float y, float z)
{
float aux[16];
setRotationMatrix(aux, angle, x, y, z);
multMatrix(modelMatrix, aux);
setModelMatrix();
}
void C3DModel::scale(float x, float y, float z)
{
float aux[16];
setScaleMatrix(aux, x, y, z);
multMatrix(modelMatrix, aux);
setModelMatrix();
}
void C3DModel::setIdentityMatrix(float *mat, int size)
{
// fill matrix with 0s
for (int i = 0; i < size * size; ++i)
mat[i] = 0.0f;
// fill diagonal with 1s
for (int i = 0; i < size; ++i)
mat[i + i * size] = 1.0f;
}
void C3DModel::multMatrix(float *a, float *b)
{
float res[16];
for (int i = 0; i < 4; ++i)
{
for (int j = 0; j < 4; ++j)
{
res[j * 4 + i] = 0.0f;
for (int k = 0; k < 4; ++k)
{
res[j * 4 + i] += a[k * 4 + i] * b[j * 4 + k];
}
}
}
memcpy(a, res, 16 * sizeof(float));
}
void C3DModel::setModelMatrix()
{
glBindBuffer(GL_UNIFORM_BUFFER, matricesUniBuffer);
glBufferSubData(GL_UNIFORM_BUFFER, ModelMatrixOffset, MatrixSize, modelMatrix);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
}
void C3DModel::crossProduct(float *a, float *b, float *res)
{
res[0] = a[1] * b[2] - b[1] * a[2];
res[1] = a[2] * b[0] - b[2] * a[0];
res[2] = a[0] * b[1] - b[0] * a[1];
}
// Normalize a vec3
void C3DModel::normalize(float *a)
{
float mag = sqrt(a[0] * a[0] + a[1] * a[1] + a[2] * a[2]);
a[0] /= mag;
a[1] /= mag;
a[2] /= mag;
}
void C3DModel::setCamera(float posX, float posY, float posZ, float lookAtX, float lookAtY, float lookAtZ)
{
float dir[3], right[3], up[3];
up[0] = 0.0f; up[1] = 1.0f; up[2] = 0.0f;
dir[0] = (lookAtX - posX);
dir[1] = (lookAtY - posY);
dir[2] = (lookAtZ - posZ);
normalize(dir);
crossProduct(dir, up, right);
normalize(right);
crossProduct(right, dir, up);
normalize(up);
float viewMatrix[16], aux[16];
viewMatrix[0] = right[0];
viewMatrix[4] = right[1];
viewMatrix[8] = right[2];
viewMatrix[12] = 0.0f;
viewMatrix[1] = up[0];
viewMatrix[5] = up[1];
viewMatrix[9] = up[2];
viewMatrix[13] = 0.0f;
viewMatrix[2] = -dir[0];
viewMatrix[6] = -dir[1];
viewMatrix[10] = -dir[2];
viewMatrix[14] = 0.0f;
viewMatrix[3] = 0.0f;
viewMatrix[7] = 0.0f;
viewMatrix[11] = 0.0f;
viewMatrix[15] = 1.0f;
setTranslationMatrix(aux, -posX, -posY, -posZ);
multMatrix(viewMatrix, aux);
glBindBuffer(GL_UNIFORM_BUFFER, matricesUniBuffer);
glBufferSubData(GL_UNIFORM_BUFFER, ViewMatrixOffset, MatrixSize, viewMatrix);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
}
What i will try is to separate the rotation of your object and the translation requested for your screen position, in 2 different matrices.
At each frame, I would compute the rotation matrice with the code inside your C3DModel::setRotationMatrix and the translation with C3DModel::setTranslationMatrix, combine them in a fresh new model matrice and apply it to your object. Keep in mind that the order matters, if you rotate first the object will turn around the origin in your obj file, if you rotate after the translation it will rotate around the worl origin (like a planet around the sun, the sun would be the origin).
In the end, it would looks like:
void C3DModel::render(void){
float* rotation = createRotation(angle, x, y, z);
float* translation = createTranslation(x, y, z);
float* updatedModel = mul(rotation, translation) //order matters
setModel(updatedModel);
}
I began to implement the depth of field in my application, but I ran into a problem. Artifacts appear in the form of a non-smooth transition between depths.
I'm doing the depth of field in the following way:
With the main scene rendering, I record the blur value in the alpha channel. I do this using this: fragColor.a = clamp(abs(focalDepth + fragPos.z) / focalRange, 0.0, 1.0), where focalDepth = 8, focalRange = 20.
After that I apply a two-step (horizontally and vertically) Gaussian blur with dynamic size and sigma, depending on the blur value (which I previously recorded in the alpha channel)(shader below)
But I have an artifact, where you see a clear transition between the depths.
The whole scene:
And with an increased scale:
My fragment blur shader:
#version 330
precision mediump float;
#define BLOOM_KERNEL_SIZE 8
#define DOF_KERNEL_SIZE 8
/* ^^^ definitions ^^^ */
layout (location = 0) out vec4 bloomFragColor;
layout (location = 1) out vec4 dofFragColor;
in vec2 texCoords;
uniform sampler2D image; // bloom
uniform sampler2D image2; // dof
uniform bool isHorizontal;
uniform float kernel[BLOOM_KERNEL_SIZE];
float dof_kernel[DOF_KERNEL_SIZE];
vec4 tmp;
vec3 bloom_result;
vec3 dof_result;
float fdof;
float dofSigma;
int dofSize;
void makeDofKernel(int size, float sigma) {
size = size * 2 - 1;
float tmpKernel[DOF_KERNEL_SIZE * 2 - 1];
int mean = size / 2;
float sum = 0; // For accumulating the kernel values
for (int x = 0; x < size; x++) {
tmpKernel[x] = exp(-0.5 * pow((x - mean) / sigma, 2.0));
// Accumulate the kernel values
sum += tmpKernel[x];
}
// Normalize the kernel
for (int x = 0; x < size; x++)
tmpKernel[x] /= sum;
// need center and right part
for (int i = 0; i < mean + 1; i++) dof_kernel[i] = tmpKernel[size / 2 + i];
}
void main() {
vec2 texOffset = 1.0 / textureSize(image, 0); // gets size of single texel
tmp = texture(image2, texCoords);
fdof = tmp.a;
dofSize = clamp(int(tmp.a * DOF_KERNEL_SIZE), 1, DOF_KERNEL_SIZE);
if (dofSize % 2 == 0) dofSize++;
makeDofKernel(dofSize, 12.0 * fdof + 1);
bloom_result = texture(image, texCoords).rgb * kernel[0]; // current fragment’s contribution
dof_result = tmp.rgb * dof_kernel[0];
if(isHorizontal) {
for(int i = 1; i < kernel.length(); i++) {
bloom_result += texture(image, texCoords + vec2(texOffset.x * i, 0.0)).rgb * kernel[i];
bloom_result += texture(image, texCoords - vec2(texOffset.x * i, 0.0)).rgb * kernel[i];
}
for(int i = 1; i < dofSize; i++) {
dof_result += texture(image2, texCoords + vec2(texOffset.x * i, 0.0)).rgb * dof_kernel[i];
dof_result += texture(image2, texCoords - vec2(texOffset.x * i, 0.0)).rgb * dof_kernel[i];
}
} else {
for(int i = 1; i < kernel.length(); i++) {
bloom_result += texture(image, texCoords + vec2(0.0, texOffset.y * i)).rgb * kernel[i];
bloom_result += texture(image, texCoords - vec2(0.0, texOffset.y * i)).rgb * kernel[i];
}
for(int i = 1; i < dofSize; i++) {
dof_result += texture(image2, texCoords + vec2(0.0, texOffset.y * i)).rgb * dof_kernel[i];
dof_result += texture(image2, texCoords - vec2(0.0, texOffset.y * i)).rgb * dof_kernel[i];
}
}
bloomFragColor = vec4(bloom_result, 1.0);
dofFragColor = vec4(dof_result, fdof);
}
And the settings for the DOF texture: glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, SCR_W, SCR_H, 0, GL_RGBA, GL_FLOAT, NULL)
Optimization of the shader I'll do later, now I'm very concerned about this artifact. How it can be eliminated? It is desirable not to change the way of realization of the depth of field. But if you know a more productive way - a big request to share it.
I will be grateful for help.
The problem is solved. My mistake was that I changed the size of DOF blur kernel, although I had to change only the sigma. Corrected shader code:
#version 330
precision mediump float;
#define BLOOM_KERNEL_SIZE 8
#define DOF_KERNEL_SIZE 8
/* ^^^ definitions ^^^ */
layout (location = 0) out vec4 bloomFragColor;
layout (location = 1) out vec4 dofFragColor;
in vec2 texCoords;
uniform sampler2D image; // bloom
uniform sampler2D image2; // dof
uniform bool isHorizontal;
uniform float max_sigma = 12.0;
uniform float min_sigma = 0.0001;
uniform float kernel[BLOOM_KERNEL_SIZE];
float dof_kernel[DOF_KERNEL_SIZE];
vec4 tmp;
vec3 bloom_result;
vec3 dof_result;
float fdof;
const int DOF_LCR_SIZE = DOF_KERNEL_SIZE * 2 - 1; // left-center-right (lllcrrr)
const int DOF_MEAN = DOF_LCR_SIZE / 2;
void makeDofKernel(float sigma) {
float sum = 0; // For accumulating the kernel values
for (int x = DOF_MEAN; x < DOF_LCR_SIZE; x++) {
dof_kernel[x - DOF_MEAN] = exp(-0.5 * pow((x - DOF_MEAN) / sigma, 2.0));
// Accumulate the kernel values
sum += dof_kernel[x - DOF_MEAN];
}
sum += sum - dof_kernel[0];
// Normalize the kernel
for (int x = 0; x < DOF_KERNEL_SIZE; x++) dof_kernel[x] /= sum;
}
void main() {
vec2 texOffset = 1.0 / textureSize(image, 0); // gets size of single texel
tmp = texture(image2, texCoords);
fdof = tmp.a;
makeDofKernel(max_sigma * fdof + min_sigma);
bloom_result = texture(image, texCoords).rgb * kernel[0]; // current fragment’s contribution
dof_result = tmp.rgb * dof_kernel[0];
if(isHorizontal) {
for(int i = 1; i < BLOOM_KERNEL_SIZE; i++) {
bloom_result += texture(image, texCoords + vec2(texOffset.x * i, 0.0)).rgb * kernel[i];
bloom_result += texture(image, texCoords - vec2(texOffset.x * i, 0.0)).rgb * kernel[i];
}
for(int i = 1; i < DOF_KERNEL_SIZE; i++) {
dof_result += texture(image2, texCoords + vec2(texOffset.x * i, 0.0)).rgb * dof_kernel[i];
dof_result += texture(image2, texCoords - vec2(texOffset.x * i, 0.0)).rgb * dof_kernel[i];
}
} else {
for(int i = 1; i < BLOOM_KERNEL_SIZE; i++) {
bloom_result += texture(image, texCoords + vec2(0.0, texOffset.y * i)).rgb * kernel[i];
bloom_result += texture(image, texCoords - vec2(0.0, texOffset.y * i)).rgb * kernel[i];
}
for(int i = 1; i < DOF_KERNEL_SIZE; i++) {
dof_result += texture(image2, texCoords + vec2(0.0, texOffset.y * i)).rgb * dof_kernel[i];
dof_result += texture(image2, texCoords - vec2(0.0, texOffset.y * i)).rgb * dof_kernel[i];
}
}
bloomFragColor = vec4(bloom_result, 1.0);
dofFragColor = vec4(dof_result, fdof);
}
Result:
I add a Sprite as background.
Now I wish my Sprite can blur gradually become blurred.
I think I may modify the Texture2D to do the job, but it seems that Texture2D can not be modified.
So, what should I do?
You can use shader for that. You can get simple blur shader from cocos test project, like this:
#ifdef GL_ES
precision mediump float;
#endif
varying vec4 v_fragmentColor;
varying vec2 v_texCoord;
uniform vec2 resolution;
uniform float blurRadius;
uniform float sampleNum;
vec4 blur(vec2);
void main(void)
{
vec4 col = blur(v_texCoord); //* v_fragmentColor.rgb;
gl_FragColor = vec4(col) * v_fragmentColor;
}
vec4 blur(vec2 p)
{
if (blurRadius > 0.0 && sampleNum > 1.0)
{
vec4 col = vec4(0);
vec2 unit = 1.0 / resolution.xy;
float r = blurRadius;
float sampleStep = r / sampleNum;
float count = 0.0;
for(float x = -r; x < r; x += sampleStep)
{
for(float y = -r; y < r; y += sampleStep)
{
float weight = (r - abs(x)) * (r - abs(y));
col += texture2D(CC_Texture0, p + vec2(x * unit.x, y * unit.y)) * weight;
count += weight;
}
}
return col / count;
}
return texture2D(CC_Texture0, p);
}
If you don't know how to add custom shader to your sprite - here is an example!
You extend Sprite class:
class MySpriteBlur : public Sprite {
public:
~MySpriteBlur();
bool initWithTexture(Texture2D* texture, const Rect& rect);
void initGLProgram();
static MySpriteBlur *create(const char *pszFileName);
void setBlurRadius(float radius);
void setBlurSampleNum(float num);
protected:
float _blurRadius;
float _blurSampleNum;
};
And then implement it:
MySpriteBlur::~MySpriteBlur() {
}
MySpriteBlur* MySpriteBlur::create(const char *pszFileName) {
MySpriteBlur* pRet = new (std::nothrow) MySpriteBlur();
if (pRet && pRet->initWithFile(pszFileName)) {
pRet->autorelease();
} else {
CC_SAFE_DELETE(pRet);
}
return pRet;
}
bool MySpriteBlur::initWithTexture(Texture2D* texture, const Rect& rect) {
_blurRadius = 0;
if (Sprite::initWithTexture(texture, rect)) {
#if CC_ENABLE_CACHE_TEXTURE_DATA
auto listener = EventListenerCustom::create(EVENT_RENDERER_RECREATED, [this](EventCustom* event) {
initGLProgram();
});
_eventDispatcher->addEventListenerWithSceneGraphPriority(listener, this);
#endif
initGLProgram();
return true;
}
return false;
}
void MySpriteBlur::initGLProgram() {
std::string fragSource = FileUtils::getInstance()->getStringFromFile(
FileUtils::getInstance()->fullPathForFilename("shaders/example_blur.fsh"));
auto program = GLProgram::createWithByteArrays(ccPositionTextureColor_noMVP_vert, fragSource.data());
auto glProgramState = GLProgramState::getOrCreateWithGLProgram(program);
setGLProgramState(glProgramState);
auto size = getTexture()->getContentSizeInPixels();
getGLProgramState()->setUniformVec2("resolution", size);
getGLProgramState()->setUniformFloat("blurRadius", _blurRadius);
getGLProgramState()->setUniformFloat("sampleNum", 7.0f);
}
void MySpriteBlur::setBlurRadius(float radius) {
_blurRadius = radius;
getGLProgramState()->setUniformFloat("blurRadius", _blurRadius);
}
void MySpriteBlur::setBlurSampleNum(float num) {
_blurSampleNum = num;
getGLProgramState()->setUniformFloat("sampleNum", _blurSampleNum);
}
Hope that will help!
You have three options:
1) make a blurred background in photoshop (quick and simple, but extra size),
2) use a shader (not that simple and blur is a heavy operation),
3) redraw (on the fly) your background making it a new texture.
Here's my post how to draw on texture:
http://discuss.cocos2d-x.org/t/is-it-possible-to-erase-some-pixels-from-a-sprite/34460/5?u=piotrros
Knowing this here's a function from my project, which blurs one image (a data array) to another one:
void Sample::blur(unsigned char* inputData, unsigned char* outputData, float r) {
int R2 = pow(r + 2, 2);
for(int i = 0; i < canvasHeight; i++){
for(int j = 0; j < canvasWidth; j++) {
int val1 = 0;
int val2 = 0;
int val3 = 0;
int val4 = 0;
int index2 = (j + (canvasHeight - i - 1) * canvasWidth) * 4;
for(int iy = i - r; iy < i + r + 1; iy++){
for(int ix = j - r; ix < j + r + 1; ix++) {
int x = CLAMP(ix, 0, canvasWidth - 1);
int y = CLAMP(iy, 0, canvasHeight - 1);
int index = (x + (canvasHeight - y - 1) * canvasWidth) * 4;
val1 += inputData[index];
val2 += inputData[index + 1];
val3 += inputData[index + 2];
val4 += inputData[index + 3];
}
}
outputData[index2] = val1 / R2;
outputData[index2 + 1] = val2 / R2;
outputData[index2 + 2] = val3 / R2;
outputData[index2 + 3] = val4 / R2;
}
}
}
Just remember that blur is heavy and long operation so if you have a big image it may take a while.
I am trying to implement an omni-directional light source (a.k.a., point light source) in my raytracing program in C++. I am not getting the expected results, but I can't figure out the problem. Maybe someone can see what I am doing wrong.
I have included the two functions that are responsible for raytracing and the light. The ClosestIntersection function finds the closest intersection and a triangle. That is used later in the DirectLight function.
I would really appreciate any help.
#include <iostream>
#include <glm/glm.hpp>
#include <SDL.h>
#include "SDLauxiliary.h"
#include "TestModel.h"
#include "math.h"
using namespace std;
using glm::vec3;
using glm::mat3;
// ----------------------------------------------------------------------------
// GLOBAL VARIABLES
const int SCREEN_WIDTH = 500;
const int SCREEN_HEIGHT = 500;
SDL_Surface* screen;
int t;
vector<Triangle> triangles;
float focalLength = 900;
vec3 cameraPos(0, 0, -4.5);
vec3 lightPos(0.5, 0.5, 0);
vec3 lightColor = 14.f * vec3(1,1,1);
// Translate camera
float translation = 0.1; // use this to set translation increment
// Rotate camera
float yaw;
vec3 trueCameraPos;
const float PI = 3.1415927;
// ----------------------------------------------------------------------------
// CLASSES
class Intersection;
// ----------------------------------------------------------------------------
// FUNCTIONS
void Update();
void Draw();
bool ClosestIntersection(vec3 start, vec3 dir, const vector<Triangle>& triangles,
Intersection& closestIntersection);
vec3 DirectLight(const Intersection& i);
// ----------------------------------------------------------------------------
// STRUCTURES
struct Intersection
{
vec3 position;
float distance;
int triangleIndex;
};
float m = std::numeric_limits<float>::max();
int main(int argc, char* argv[])
{
LoadTestModel(triangles);
screen = InitializeSDL(SCREEN_WIDTH, SCREEN_HEIGHT);
t = SDL_GetTicks(); // Set start value for timer.
while (NoQuitMessageSDL())
{
Update();
Draw();
}
SDL_SaveBMP(screen, "screenshot.bmp");
return 0;
}
void Update()
{
// Compute frame time:
int t2 = SDL_GetTicks();
float dt = float(t2 - t);
t = t2;
cout << "Render time: " << dt << " ms." << endl;
}
}
void Draw()
{
if (SDL_MUSTLOCK(screen))
SDL_LockSurface(screen);
for (int y = 0; y<SCREEN_HEIGHT; ++y)
{
for (int x = 0; x < SCREEN_WIDTH; ++x)
{
vec3 start = cameraPos;
vec3 dir(x - SCREEN_WIDTH / 2, y - SCREEN_HEIGHT / 2, focalLength);
Intersection intersection;
if (ClosestIntersection(start, dir, triangles, intersection))
{
//vec3 theColor = triangles[intersection.triangleIndex].color;
vec3 theColor = DirectLight(intersection);
PutPixelSDL(screen, x, y, theColor);
}
else
{
vec3 color(0, 0, 0);
PutPixelSDL(screen, x, y, color);
}
}
}
if (SDL_MUSTLOCK(screen))
SDL_UnlockSurface(screen);
SDL_UpdateRect(screen, 0, 0, 0, 0);
}
bool ClosestIntersection(vec3 s, vec3 d,
const vector<Triangle>& triangles, Intersection& closestIntersection)
{
closestIntersection.distance = m;
for (size_t i = 0; i < triangles.size(); i++)
{
vec3 v0 = triangles[i].v0;
vec3 v1 = triangles[i].v1;
vec3 v2 = triangles[i].v2;
vec3 u = v1 - v0;
vec3 v = v2 - v0;
vec3 b = s - v0;
vec3 x;
// Determinant of A = [-d u v]
float det = -d.x * ((u.y * v.z) - (v.y * u.z)) -
u.x * ((-d.y * v.z) - (v.y * -d.z)) +
v.x * ((-d.y * u.z) - (u.y * -d.z));
// Cramer'r Rule for t = x.x
x.x = (b.x * ((u.y * v.z) - (v.y * u.z)) -
u.x * ((b.y * v.z) - (v.y * b.z)) +
v.x * ((b.y * u.z) - (u.y * b.z))) / det;
if (x.x >= 0)
{
// Cramer'r Rule for u = x.y
x.y = (-d.x * ((b.y * v.z) - (v.y * b.z)) -
b.x * ((-d.y * v.z) - (v.y * -d.z)) +
v.x * ((-d.y * b.z) - (b.y * -d.z))) / det;
// Cramer'r Rule for v = x.z
x.z = (-d.x * ((u.y * b.z) - (b.y * u.z)) -
u.x * ((-d.y * b.z) - (b.y * -d.z)) +
b.x * ((-d.y * u.z) - (u.y * -d.z))) / det;
if (x.y >= 0 && x.z >= 0 && x.y + x.z <= 1 && x.x < closestIntersection.distance)
{
closestIntersection.position = x;
closestIntersection.distance = x.x;
closestIntersection.triangleIndex = i;
}
}
}
//end of for loop
if (closestIntersection.distance != m)
{
return true;
}
else
{
return false;
}
}
vec3 DirectLight(const Intersection& i)
{
vec3 n = triangles[i.triangleIndex].normal;
vec3 r = lightPos - i.position;
float R2 = r.x * r.x + r.y * r.y + r.z * r.z;
vec3 D = (lightColor * fmaxf((glm::dot(glm::normalize(r), n)), 0)) / (4 * PI * R2);
return D;
}
If I'm understanding the code in ClosestIntersection correctly, here's what it's doing for each triangle:
Let u,v be the vectors from one vertex of the triangle to the other two vertices. Let d be (the reverse of) the direction of the ray we're considering.
And let b be the vector from that vertex of the triangle to the camera.
Find p,q,r so that b = pd+qu+rv (p,q,r are what your code calls x.x, x.y, x.z).
Now the ray meets the triangle if p>0, q>=0, r>=0, q+r<=1 and the distance to the intersection point is p.
So, the conditions on q,r make sense; the idea is that b-qu-rv is the vector from the camera to the relevant point in the triangle and it's in direction d. Your distances aren't really distances, but along a single ray they're the same multiple of the actual distance, which means that this works fine for determining which triangle you've hit, and that's all you use them for. So far, so good.
But then you say closestIntersection.position = x; and surely that's all wrong, because this x isn't in the same coordinate system as your camera location, triangle vertices, etc. It's in this funny "how much of d, how much of u, how much of v" coordinate system which isn't even the same from one triangle to the next. (Which is why you are getting discontinuities at triangle boundaries even within a single face, I think.)
Try setting it to v0+x.y*(v1-v0)+x.z*(v2-v0) instead (I think this is right; it's meant to be the actual point where the ray crosses the triangle, in the same coordinates as all your other points) and see what it does.
This isn't a super-great answer, but I managed to make your code work without the strange shading discontinuities. The problem happens in ClosestIntersection and maybe Gareth's answer covers it. I need to stop looking at this now, but I wanted to show you what I have before I leave, and I need an Answer to post some code.
// This starts with some vec3 helper functions which make things
// easier to look at
float Dot(const vec3& a, const vec3& b) {
return a.x * b.x + a.y * b.y + a.z * b.z;
}
vec3 Cross(const vec3& a, const vec3& b) {
return vec3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
}
float L2(const vec3& v) { return v.x*v.x + v.y*v.y + v.z*v.z; }
float Abs(const vec3& v) { return std::sqrt(L2(v)); }
// Here is the replacement version of ClosestIntersection
bool ClosestIntersection(vec3 cam, vec3 dir,
const vector<Triangle>& triangles, Intersection& closestIntersection)
{
closestIntersection.distance = m;
vec3 P0 = cam;
vec3 P1 = cam + dir;
for (size_t i = 0; i < triangles.size(); ++i) {
vec3 v0 = triangles[i].v0;
vec3 v1 = triangles[i].v1;
vec3 v2 = triangles[i].v2;
// Dan Sunday
// http://geomalgorithms.com/a06-_intersect-2.html
vec3 u = v1 - v0;
vec3 v = v2 - v0;
// w = P-v0, solve w = su +tv (s, t are parametric scalars)
vec3 n = Cross(u, v);
float ri = Dot(n, (v0 - P0)) / Dot(n, (P1 - P0));
vec3 Pi = P0 + ri * (P1- P0);
vec3 w = Pi - v0;
// s = w . (n x v) / (u . (n x v))
// t = w . (n x u) / (v . (n x u))
float s = Dot(w, Cross(n, v)) / Dot(u, Cross(n, v));
float t = Dot(w, Cross(n, u)) / Dot(v, Cross(n, u));
if(s >= 0 && t >= 0 && s+t <= 1) {
float dist = Abs(cam - Pi);
if(dist < closestIntersection.distance) {
closestIntersection.position = Pi;
closestIntersection.distance = dist;
closestIntersection.triangleIndex = int(i);
}
}
}
return closestIntersection.distance != m;
}
Good luck.