Troubles with Marching Cubes and Texture coordinates - c++

I'm implementing MC algorithm in OpenGL.
Everything went fine, until I reached the point with texture coordinates.
I can't figure out how to implement them!
My progress:
Edit:
What I want to archive is to put some textures on my generated MC triangles.
As far as I understand I need to tell OpenGL uv coordinates, but no idea how to calculate them.

A typical texture coordinate generation algorithms for marching cube algorithms is to use environment mapping.
In short you calculate the vertex-normal at each vertex by averaging the face normals of all adjecting faces, then discard the z-coordinate of the normal and use (x/2+0.5, y/2+0.5) as (u,v) texture-coordinates.
Set up a texture with a nice white spot in the middle and some structure filling the rest of of the texture and you get the terminator-two silver-robot kind of look.

I need to tell OpenGL uv coordinates, but no idea how to calculate them.
You're facing some big problem there: The topology of what comes out of MC can be anything. The topology of a texture in OpenGL is either a (hyper)torus (GL_TEXTURE_1D, GL_TEXTURE_2D, GL_TEXTURE_3D), or a sphere (GL_TEXTURE_CUBE_MAP).
So inevitably you have to cut your surface into so called maps. This is a nontrivial task, but a qood strategy is cutting along regions with high curvature. See the paper
“Least Squares Conformal Maps for Automatic Texture Atlas Generation”
Bruno Lévy, Sylvain Petitjean, Nicolas Ray and Jérome Maillot
http://alice.loria.fr/index.php/publications.html?Paper=lscm#2002
for the dirty details.

The first answer given is partly correct, except you also need to check which plane is best to project from instead of always projecting from the z plane, like this C# Unity example:
Vector2[] getUVs(Vector3 a, Vector3 b, Vector3 c)
{
Vector3 s1 = b - a;
Vector3 s2 = c - a;
Vector3 norm = Vector3.Cross(s1, s2).Normalize(); // the normal
norm.x = Mathf.Abs(norm.x);
norm.y = Mathf.Abs(norm.y);
norm.z = Mathf.Abs(norm.z);
Vector2[] uvs = new Vector2[3];
if (norm.x >= norm.z && norm.x >= norm.y) // x plane
{
uvs[0] = new Vector2(a.z, a.y);
uvs[1] = new Vector2(b.z, b.y);
uvs[2] = new Vector2(c.z, c.y);
}
else if (norm.z >= norm.x && norm.z >= norm.y) // z plane
{
uvs[0] = new Vector2(a.x, a.y);
uvs[1] = new Vector2(b.x, b.y);
uvs[2] = new Vector2(c.x, c.y);
}
else if (norm.y >= norm.x && norm.y >= norm.z) // y plane
{
uvs[0] = new Vector2(a.x, a.z);
uvs[1] = new Vector2(b.x, b.z);
uvs[2] = new Vector2(c.x, c.z);
}
return uvs;
}
Though it is better to do this on the GPU in a shader, especially if you are planning on having very dynamic voxels, such as in an infinitely generated world that's constantly generating around the player or a game with lots of digging and building involved, you wouldn't have to calculate the UVs each time and it's also less data you have to send to the GPU, so it is definitely faster than this. I modified a basic triplanar shader I found on the internet a while ago, unfortunately I wasn't able to find it again, but my modified version is basically a triplanar mapping shader except with no blending and it only samples once per pass, so it should be pretty much as fast as a basic unlit shader and looks exactly the same as the image above. I did this because the normal triplanar shader blending doesn't look good with textures like brick walls at 45 degree angles.
Shader "Triplanar (no blending)"
{
Properties
{
_DiffuseMap("Diffuse Map ", 2D) = "white" {}
_TextureScale("Texture Scale",float) = 1
}
SubShader
{
Tags { "RenderType" = "Opaque" }
LOD 200
CGPROGRAM
#pragma target 3.0
#pragma surface surf Lambert
sampler2D _DiffuseMap;
float _TextureScale;
struct Input
{
float3 worldPos;
float3 worldNormal;
};
void surf(Input IN, inout SurfaceOutput o)
{
IN.worldNormal.x = abs(IN.worldNormal.x);
IN.worldNormal.y = abs(IN.worldNormal.y);
IN.worldNormal.z = abs(IN.worldNormal.z);
if (IN.worldNormal.x >= IN.worldNormal.z && IN.worldNormal.x >= IN.worldNormal.y) // x plane
{
o.Albedo = tex2D(_DiffuseMap, IN.worldPos.zy / _TextureScale);
}
else if (IN.worldNormal.y >= IN.worldNormal.x && IN.worldNormal.y >= IN.worldNormal.z) // y plane
{
o.Albedo = tex2D(_DiffuseMap, IN.worldPos.xz / _TextureScale);
}
else if (IN.worldNormal.z >= IN.worldNormal.x && IN.worldNormal.z >= IN.worldNormal.y) // z plane
{
o.Albedo = tex2D(_DiffuseMap, IN.worldPos.xy / _TextureScale);
}
}
ENDCG
}
}
It ends up looking a lot like a cubemap, though I don't think this is technically a cubemap as we only use three faces, not six.
EDIT: I later realized that you may want it in the fragment shader like that but for my purposes it works exactly the same and would theoretically be faster in the vertex shader:
Shader "NewUnlitShader"
{
Properties
{
_MainTex ("Texture", 2D) = "white" {}
}
SubShader
{
Tags { "RenderType"="Opaque" }
LOD 100
Pass
{
CGPROGRAM
#pragma vertex vert
#pragma fragment frag
// make fog work
#pragma multi_compile_fog
#include "UnityCG.cginc"
struct appdata
{
float4 vertex : POSITION;
float3 normal : NORMAL;
};
struct v2f
{
float2 uv : TEXCOORD0;
UNITY_FOG_COORDS(1)
float4 vertex : SV_POSITION;
};
sampler2D _MainTex;
float4 _MainTex_ST;
v2f vert (appdata v)
{
v2f o;
o.vertex = UnityObjectToClipPos(v.vertex);
v.normal.x = abs(v.normal.x);
v.normal.y = abs(v.normal.y);
v.normal.z = abs(v.normal.z);
if (v.normal.x >= v.normal.z && v.normal.x >= v.normal.y) // x plane
{
o.uv = v.vertex.zy;
}
else if (v.normal.y >= v.normal.x && v.normal.y >= v.normal.z) // y plane
{
o.uv = v.vertex.xz;
}
else if (v.normal.z >= v.normal.x && v.normal.z >= v.normal.y) // z plane
{
o.uv = v.vertex.xy;
}
UNITY_TRANSFER_FOG(o, o.vertex);
return o;
}
fixed4 frag (v2f i) : SV_Target
{
// sample the texture
fixed4 col = tex2D(_MainTex, i.uv);
// apply fog
UNITY_APPLY_FOG(i.fogCoord, col);
return col;
}
ENDCG
}
}
}

Related

C++ raytracer bug

I've written a raytracer in C++. This is the snippet for calculating the diffuse component:
//diffuse component
color diffuse(0, 0, 0);
if (intrs.mat.diffuseness > 0)
{
for (auto &light : lights)
{
//define ray from hit object to light
ray light_dir(intrs.point, (light->point - intrs.point).normalize());
double nl = light_dir.direction*intrs.normal; //dot product
double diminish_coeff = 1.0;
double dist = intrs.point.sqrDistance(light->point);
//check whether it reaches the light
if (nl > 0)
{
for (int i = 0; i < (int)shapes.size(); ++i)
{
shape::intersection temp_intrs(shapes[i]->intersect(light_dir, shapes[i]->interpolate_normals));
if (temp_intrs.valid && temp_intrs.point.sqrDistance(intrs.point) < dist)
{
diminish_coeff *= shadow_darkness;
break;
}
}
}
diffuse += intrs.mat.diffuseness * intrs.mat.col * light->light_color * light->light_intensity * nl*diminish_coeff;
}
}
Of course, I can't post the entire code, but I think it should be clear what I'm doing here - intrs is the current intersection of a ray and object and shapes is a vector of all objects in the scene.
Colors are represented as RGB in the (0,1) range. Addition and multiplication of colors are simple memberwise addition and multiplication. Only when the raytracing is over, and I want to write into the image file, I multiply my colors by 255 and clamp to 255 if a component is larger than that.
Currently, there is one point light in the scene and it's white: color(1,1,1), intensity = 1.0.
This is my rendered image:
So, this is not right - the cupboard on the left is supposed to be green, and the box is supposed to be red.
Is there something obviously wrong with my implementation? I can't seem to figure it out. I'll post some more code if necessary.
It seems that your diffuse += line should be inside the if (nl > 0) condition, not outside it.
I found the problem. For some reason, my intrs.normal vector wasn't normalized. Thank you everyone for your help.

Optimizing a raytracing shader in GLSL

I have coded a voxelization based raytracer which is working as expected but is very slow.
Currently the raytracer code is as follows:
#version 430
//normalized positon from (-1, -1) to (1, 1)
in vec2 f_coord;
out vec4 fragment_color;
struct Voxel
{
vec4 position;
vec4 normal;
vec4 color;
};
struct Node
{
//children of the current node
int children[8];
};
layout(std430, binding = 0) buffer voxel_buffer
{
//last layer of the tree, the leafs
Voxel voxels[];
};
layout(std430, binding = 1) buffer buffer_index
{
uint index;
};
layout(std430, binding = 2) buffer tree_buffer
{
//tree structure
Node tree[];
};
layout(std430, binding = 3) buffer tree_index
{
uint t_index;
};
uniform vec3 camera_pos; //position of the camera
uniform float aspect_ratio; // aspect ratio of the window
uniform float cube_dim; //Dimenions of the voxelization cube
uniform int voxel_resolution; //Side length of the cube in voxels
#define EPSILON 0.01
// Detect whether a position is inside of the voxel with size size located at corner
bool inBoxBounds(vec3 corner, float size, vec3 position)
{
bool inside = true;
position-=corner;//coordinate of the position relative to the box coordinate system
//Test that all coordinates are inside the box, if any is outisde, the point is out the box
for(int i=0; i<3; i++)
{
inside = inside && (position[i] > -EPSILON);
inside = inside && (position[i] < size+EPSILON);
}
return inside;
}
//Get the distance to a box or infinity if the box cannot be hit
float boxIntersection(vec3 origin, vec3 dir, vec3 corner0, float size)
{
dir = normalize(dir);
vec3 corner1 = corner0 + vec3(size,size,size);//Oposite corner of the box
float coeffs[6];
//Calculate the intersaction coefficients with te 6 bonding planes
coeffs[0] = (corner0.x - origin.x)/(dir.x);
coeffs[1] = (corner0.y - origin.y)/(dir.y);
coeffs[2] = (corner0.z - origin.z)/(dir.z);
coeffs[3] = (corner1.x - origin.x)/(dir.x);
coeffs[4] = (corner1.y - origin.y)/(dir.y);
coeffs[5] = (corner1.z - origin.z)/(dir.z);
//by default the distance to the box is infinity
float t = 1.f/0.f;
for(uint i=0; i<6; i++){
//if the distance to a boxis negative, we set it to infinity as we cannot travel in the negative direction
coeffs[i] = coeffs[i] < 0 ? 1.f/0.f : coeffs[i];
//The distance is the minumum of the previous calculated distance and the current distance
t = inBoxBounds(corner0,size,origin+dir*coeffs[i]) ? min(coeffs[i],t) : t;
}
return t;
}
#define MAX_TREE_HEIGHT 11
int nodes[MAX_TREE_HEIGHT];
int levels[MAX_TREE_HEIGHT];
vec3 positions[MAX_TREE_HEIGHT];
int sp=0;
void push(int node, int level, vec3 corner)
{
nodes[sp] = node;
levels[sp] = level;
positions[sp] = corner;
sp++;
}
void main()
{
int count = 0; //count the iterations of the algorithm
vec3 r = vec3(f_coord.x, f_coord.y, 1.f/tan(radians(40))); //direction of the ray
r.y/=aspect_ratio; //modify the direction based on the windows aspect ratio
vec3 dir = r;
r += vec3(0,0,-1.f/tan(radians(40))) + camera_pos; //put the ray at the camera position
fragment_color = vec4(0);
int max_level = int(log2(voxel_resolution));//height of the tree
push(0,0,vec3(-cube_dim));//set the stack
float tc = 1.f; //initial color value, to be decreased whenever a voxel is hit
//tree variables
int level=0;
int node=0;
vec3 corner;
do
{
//pop from stack
sp--;
node = nodes[sp];
level = levels[sp];
corner = positions[sp];
//set the size of the current voxel
float size = cube_dim / pow(2,level);
//set the corners of the children
vec3 corners[] =
{corner, corner+vec3(0,0,size),
corner+vec3(0, size,0), corner+vec3(0,size,size),
corner+vec3(size,0,0), corner+vec3(size,0,size),
corner+vec3(size,size,0), corner+vec3(size,size,size)};
float coeffs[8];
for(int child=0; child<8; child++)
{
//Test non zero childs, zero childs are empty and thus should be discarded
coeffs[child] = tree[node].children[child]>0?
//Get the distance to your child if it's not empty or infinity if it's empty
boxIntersection(r, dir, corners[child], size) : 1.f/0.f;
}
int indices[8] = {0,1,2,3,4,5,6,7};
//sort the children from closest to farthest
for(uint i=0; i<8; i++)
{
for(uint j=i; j<8; j++)
{
if((coeffs[j] < coeffs[i]))
{
float swap = coeffs[i];
coeffs[i] = coeffs[j];
coeffs[j] = swap;
int iSwap = indices[i];
indices[i] = indices[j];
indices[j] = iSwap;
vec3 vSwap = corners[i];
corners[i] = corners[j];
corners[j] = vSwap;
}
}
}
//push to stack
for(uint i=7; i>=0; i--)
{
if(!isinf(coeffs[i]))
{
push(tree[node].children[indices[i]],
level+1, corners[i]);
}
}
count++;
}while(level < (max_level-1) && sp>0);
//set color
fragment_color = vec4(count)/100;
}
As it may not be fully clear what this does, let me explain.
We check ray-box intersections starting with a big cube. If we hit it we test intersection with the 8 cubes that compose it.
If we hit any fo those we check intersections with the 8 cubes that make up that cube.
In 2D this would look as follows:
In this case we have 4 layers, we check the big box first, then the ones colored in red, then the ones colored in green, and finally the ones colored in blue.
Printing out the number of times the raytracing step executed as a color (which is what the code snippet I have provided does)
results in the following image:
As you can see, most of the time the shader doesn't do more than 100 iterations.
However this shader takes 200 000 microseconds to execute on average in a gtx 1070.
Since the issue is not number of executions, my problem is likely to be on thread execution.
Does anyone know how I could optimize this code?
The biggest botttleneck seems to be the use of a stack.
If I run the same code without pushing to the stack (generating wrong output), there is a 10 fold improvement in runtime
It seems you test for intersection with the ray most of all voxels in each level of the octree. And sort them (by some distance) also in each level.
I propose another approach.
If the ray intersects with the bounding box (level 0 of the octree) it makes it at two faces of the box. Or in a corner or an edge, these are "corner" cases.
Finding the 3D ray-plane intersection can be done like here. Finding if the intersection is inside the face (quad) can be done by testing if the point is inside of one of the two triangles of the face, like here.
Get the farthest intersection I0 from the camera. Also let r be a unit vector of the ray in the direction I0 toward the camera.
Find the deepest voxel for I0 coordinates. This is the farthest voxel from the camera.
Now we want the exit-coordinates I0e for the ray in that voxel, through another face. While you could do again the calculations for all 6 faces, if your voxels are X,Y,X aligned and you define the ray in the same coordinates system as the octree, then calculae simplify a lot.
Apply a little displacement (e.g. a 1/1000 of the smallest voxel size) to I0e by the r unit vector of the ray: I1 = I0e + r/1000. Find the voxel to these I1. This is the next voxel in the sorted list of voxel-ray intersections.
Repeat finding I1e then I2 then I2e then I3 etc. until the bounding box is exited. The list of crossed voxels is sorted.
Working with the octree can be optimized depending on how you store its info: All possible nodes or just used. Nodes with data or just "pointers" to another container with the data. This is matter for another question.
The first thing that stands out is your box intersection function. Have a look at inigo quilez' procedural box function for a much faster version. Since your boxsize is uniform in all axes and you don't need outNormal, you can get an even lighter version. In essence, use maths instead of the brute force approach that tests each box plane.
Also, try to avoid temporary storage where possible. For example, the corners array could be computed on demand for each octree box. Of course, with the above suggestion, these will be changed to box centers.
Since nodes, levels and positions are always accessed together, try co-locating them in a new single struct and access them as a single unit.
Will look more later...
Thread execution on a GPU may be massively parallel, but that doesn’t mean that all threads run independently from one another. Groups of threads execute exactly the same instructions, the only difference is the input data. That means that branches and therefore loops can’t make a thread diverge in execution and therefore also not let them terminate early.
Your example shows the most extreme edge case of this: when there is a high likelyhood that in a group of threads all work that’s done is relevant to one thread only.
To alleviate this, you should try to reduce the difference in execution length (iterations in your case) for threads in a group (or in total). This can be done by setting a limit on the number of iterations per shader pass and rescheduling only those threads/pixels that need more iterations.

Custom shader - Unity - Diffuse+Lightmap+normal+cubemap

I've been working on a custom shader for a project using Unity 4.6 because Unity's shaders offers a great variety of options but not the one i'm looking for.
I've looked on Stackoverflow about my shader's issue, but every question is about tricky and higly technical issue using shader. I think mine is quite simple (for an experienced developper) but haven't been posted yet.
Here is the problem :
I want to merge 2 shaders to get a "Diffuse+normal+cubemap+lighmap" shader.
So, on one side I have a "Diffuse + NormalMap + LightMap" shaders which looks like this (it's the legacy/lighmap bumpedspec a with a little tweaking to get the specular shinyness):
Shader "Legacy Shaders/Lightmapped/Custom/BumpedSpec" {
Properties {
_Color ("Main Color", Color) = (1,1,1,1)
_SpecColor ("Specular Color", Color) = (0.5, 0.5, 0.5, 1)
_Shininess ("Shininess", Range (0.03, 1)) = 0.078125
_MainTex ("Base (RGB)", 2D) = "white" {}
_BumpMap ("Normalmap", 2D) = "bump" {}
_LightMap ("Lightmap (RGB)", 2D) = "black" {}
}
SubShader {
LOD 200
Tags { "RenderType" = "Opaque" }
CGPROGRAM
#pragma surface surf BlinnPhong
struct Input {
float2 uv_MainTex;
float2 uv_BumpMap;
float2 uv2_LightMap;
};
sampler2D _MainTex;
sampler2D _LightMap;
sampler2D _BumpMap;
float4 _Color;
float _Shininess;
void surf (Input IN, inout SurfaceOutput o)
{
half4 tex = tex2D (_MainTex, IN.uv_MainTex);
o.Albedo = tex.rgb * _Color;
half4 lm = tex2D (_LightMap, IN.uv2_LightMap);
o.Emission = lm.rgb*o.Albedo.rgb;
o.Gloss = tex.a;
o.Alpha = lm.a * _Color.a;
o.Specular = _Shininess;
o.Normal = UnpackNormal(tex2D(_BumpMap, IN.uv_BumpMap));
}
ENDCG
}
FallBack "Legacy Shaders/Lightmapped/VertexLit"
}
And on the other side, i've got a shader with "Diffuse+cubemap+Lightmap" which looks like this :
Shader "Custom/CubeLightmap" {
Properties {
_Color ("Main Color", Color) = (1,1,1,1)
_ReflectColor ("Reflection Color", Color) = (1,1,1,0.5)
_MainTex ("Base (RGB) RefStrength (A)", 2D) = "white" {}
_Cube ("Reflection Cubemap", Cube) = "_Skybox" { TexGen CubeReflect }
_LightMap ("Lightmap (RGB)", 2D) = "lightmap" { LightmapMode }
}
SubShader {
LOD 200
Tags { "RenderType"="Opaque" }
CGPROGRAM
#pragma surface surf Lambert
sampler2D _MainTex;
samplerCUBE _Cube;
sampler2D _LightMap;
fixed4 _Color;
fixed4 _ReflectColor;
struct Input {
float2 uv_MainTex;
float3 worldRefl;
float2 uv2_LightMap;
};
void surf (Input IN, inout SurfaceOutput o) {
fixed4 tex = tex2D(_MainTex, IN.uv_MainTex);
fixed4 c = tex * _Color;
o.Albedo = c.rgb;
half4 lm = tex2D(_LightMap,IN.uv2_LightMap);
fixed4 reflcol = texCUBE (_Cube, IN.worldRefl);
reflcol *= tex.a;
o.Emission = lm.rgb * reflcol.rgb * _ReflectColor.rgb;
o.Alpha = reflcol.a * _ReflectColor.a * lm.a;
}
ENDCG
}
FallBack "Reflective/VertexLit"
}
So I want to merge both off them (a.k.a include cubemap in the first one or include normalmap in the second one) and I can't figure it out for the moment.
So I'm in need of some advice or help to achieve it.
Thanks in advance,
Regards
Sounds to me like you are looking to create a radiosity normal mapping shader. This will require you to at least know the basics of C++, and HSLS
The biggest problem you will encounter will be how to compute radiosity normal maps, as this requires a specially crafted light mapper. The only software I know of that does this is Beast by AutoDesk
After that you will need some simple shaders. There is some well documented explanations, and relevant code located at
Half Life 2 shading
Valve Software

Blank Screen after compiling openGL

I am really new to OpenGL and I am trying to just make a surface from two triangles. I don't know where I am going wrong with this code. I know that all the positions and colors are getting into the triangles class and that the Triangles are being made, but it's not getting outputted. Can someone help?
I tried to get just the output from the Triangle class but it doesn't seem to be working. I don't think there's anything wrong with the way I am calling the Display function.
Code:
#include<GL/gl.h>
#include<GL/glu.h>
#include<GL/glut.h>
#include<iostream>
#include<vector>
using namespace std;
class Triangle
{
public:
float position[9],color[3];
Triangle()
{}
Triangle(float position_t[], float color_t[])
{
for(int i=0;i<9;i++)
{position[i] = position_t[i];}
for(int i=0;i<3;i++)
{color[i]= color_t[i];}
}
void makeTriangle()
{
glBegin(GL_TRIANGLES);
glColor3f(color[0],color[1],color[2]);glVertex3f(position[0],position[1],position[2]);
glColor3f(color[0],color[1],color[2]);glVertex3f(position[3],position[4],position[5]);
glColor3f(color[0],color[1],color[2]);glVertex3f(position[6],position[7],position[8]);
glEnd();}
};
class Mesh
{
public:
/*float center[3],position[9],color[3];
float size;*/
vector<Triangle> elements;
float center[3],position[9],color[3];
float size;
Mesh(){}
Mesh(float center_in[3], float color_in[3])
{
for (int i=0;i<3;i++)
{
color[i] = color_in[i];
center[i] = center_in[i];
}
}
void getPositions()
{
position[0] = 1;position[1] = 1; position[2] = 1;
position[3] = -1;position[4] = -1; position[5] = 1;
position[6] = 1;position[7] = -1; position[8] = 1;
}
void getColor()
{
color[0] = 1; color[1]=0; color[2]=0;
}
static Mesh makeMesh()
{
Mesh a;
a.elements.resize(2);
a.getPositions();
a.getColor();
Triangle T(a.position,a.color);
a.elements[0] = T;
//Triangle O(2);
//a.elements[1] = 0;
return a;
}
};
void render()
{
glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT);
Mesh a;
a.elements.resize(2);
a.getPositions();
a.getColor();
Triangle T(a.position,a.color);
//vector<Mesh> m;
//m.push_back(Mesh::makeMesh());
glPushMatrix();
T.makeTriangle();
glPopMatrix();
glFlush();
glutSwapBuffers();
glutPostRedisplay();
}
Full Code: http://pastebin.com/xa3B7166
As I suggested you in the comments, you are not setting the gluLookat() function. Everything is being drawn but you are just not looking at it!
Docs: https://www.opengl.org/sdk/docs/man2/xhtml/gluLookAt.xml
Your code does not specify any transformations. Therefore, your coordinates need to be within the default view volume, which is [-1, 1] in all coordinate directions.
Or more technically, the model/view/projection transformations (or all the transformations applied in your vertex shader if you use the programmable pipeline) transform the coordinates into the clip coordinate space, and after perspective division into the normalized device coordinate (aka NDC) space. The range of the NDC space is [-1, 1] for all coordinates.
If you don't apply any transformations, like is the case in your code, your original coordinates already have to be in NDC space.
With your current coordinates:
position[0] = 1;position[1] = 1; position[2] = 1;
position[3] = -1;position[4] = -1; position[5] = 1;
position[6] = 1;position[7] = -1; position[8] = 1;
all the z-coordinates have values of 1, which means that the whole triangle is right on the boundary of the clip volume. To make it visible, you can simply set the z-coordinates to 0:
position[0] = 1;position[1] = 1; position[2] = 0;
position[3] = -1;position[4] = -1; position[5] = 0;
position[6] = 1;position[7] = -1; position[8] = 0;
This centers it within the NDC space in z-direction, with the vertices being on 3 of the corners in the xy-plane. You will therefore see half of your window covered by the triangle, cutting it in half along the diagonal.
It's of course common in OpenGL to have the original coordinates in a different coordinate space, and then apply transformations to place them within the view volume.
You're probably already aware of this, but I thought I'd mention it anyway: If you're just starting to learn OpenGL, I would suggest that you learn what people often call "modern OpenGL". This includes the OpenGL Core Profile, or OpenGL ES 2.0 or later. The calls you are using now are mostly deprecated in newer versions of OpenGL, and not available anymore in the Core Profile and ES. The initial hurdle is somewhat higher for "modern OpenGL", particularly since you have to write your own shaders, but you will get on the path to acquiring knowledge that is still current.

Precise Texture Overlay

I'm trying to set up a two-stage render of objects in a 3D engine I'm working on written in C++ with DirectX9 to facilitate transparency (and other things). I thought it was all working nicely until I noticed some dodgyness on the edge of objects rendered before objects using this two stage method.
The two stage method is simple:
Draw model to off-screen ("side") texture of same size using same zbuffer (no MSAA is used anywhere)
Draw off-screen ("side") texture over the top of the main render target with a suitable blend and no alpha test or write
In the image below the left view is with the two stage render of the gray object (a lamppost) with the body in-front of it rendered directly to the target texture. The right view is with the two-stage render disabled, so both are rendered directly onto the target surface.
On close inspection it is as if the side texture is offset by exactly 1 pixel "down" and 1 pixel "right" when rendered over the target surface (but is rendered correctly in-place). This can be seen in an overlay of the off screen texture (which I get my program to write out to a bitmap file via D3DXSaveTextureToFile) over a screen shot below.
One last image so you can see where the edge in the side texture is coming from (it's because rendering to the side texture does use z test). Left is screen short, right is side texture (as overlaid above).
All this leads me to believe that my "overlaying" isn't very effective. The code that renders the side texture over the main render target is shown below (note that the same viewport is used for all scene rendering (on and off screen)). The "effect" object is an instance of a thin wrapper over LPD3DXEFFECT, with the "effect" field (sorry about shoddy naming) being a LPD3DXEFFECT itself.
void drawSideOver(LPDIRECT3DDEVICE9 dxDevice, drawData* ddat)
{ // "ddat" drawdata contains lots of render state information, but all we need here is the handles for the targetSurface and sideSurface
D3DXMATRIX idMat;
D3DXMatrixIdentity(&idMat); // create identity matrix
dxDevice->SetRenderTarget(0, ddat->targetSurface); // switch to targetSurface
dxDevice->SetRenderState(D3DRS_ZENABLE, false); // disable z test and z write
dxDevice->SetRenderState(D3DRS_ZWRITEENABLE, false);
vertexOver overVerts[4]; // create square
overVerts[0] = vertexOver(-1, -1, 0, 0, 1);
overVerts[1] = vertexOver(-1, 1, 0, 0, 0);
overVerts[2] = vertexOver(1, -1, 0, 1, 1);
overVerts[3] = vertexOver(1, 1, 0, 1, 0);
effect.setTexture(ddat->sideTex); // use side texture as shader texture ("tex")
effect.effect->SetTechnique("over"); // change to "over" technique
effect.setViewProj(&idMat); // set viewProj to identity matrix so 1/-1 map directly
effect.effect->CommitChanges();
setAlpha(dxDevice); // this sets up the alpha blending which works fine
UINT numPasses, pass;
effect.effect->Begin(&numPasses, 0);
effect.effect->BeginPass(0);
dxDevice->SetVertexDeclaration(vertexDecOver);
dxDevice->DrawPrimitiveUP(D3DPT_TRIANGLESTRIP, 2, overVerts, sizeof(vertexOver));
effect.effect->EndPass();
effect.effect->End();
dxDevice->SetRenderState(D3DRS_ZENABLE, true); // revert these so we don't mess everything up drawn after this
dxDevice->SetRenderState(D3DRS_ZWRITEENABLE, true);
}
The C++ side definition for the VertexOver struct and constructor (HLSL side shown below somewhere):
struct vertexOver
{
public:
float x;
float y;
float z;
float w;
float tu;
float tv;
vertexOver() { }
vertexOver(float xN, float yN, float zN, float tuN, float tvN)
{
x = xN;
y = yN;
z = zN;
w = 1.0;
tu = tuN;
tv = tvN;
}
};
Inefficiency in re-creating and passing the vertices down to the GPU each draw aside, what I really want to know is why this method doesn't quite work, and if there are any better methods for overlaying textures like this with an alpha blend that won't exhibit this issue
I figured that the texture sampling may matter somewhat in this matter, but messing about with options didn't seem to help much (for example, using a LINEAR filter just makes it fuzzy as you might expect implying that the offset isn't as clear-cut as a 1 pixel discrepancy). Shader code:
struct VS_Input_Over
{
float4 pos : POSITION0;
float2 txc : TEXCOORD0;
};
struct VS_Output_Over
{
float4 pos : POSITION0;
float2 txc : TEXCOORD0;
float4 altPos : TEXCOORD1;
};
struct PS_Output
{
float4 col : COLOR0;
};
Texture tex;
sampler texSampler = sampler_state { texture = <tex>;magfilter = NONE; minfilter = NONE; mipfilter = NONE; AddressU = mirror; AddressV = mirror;};
// side/over shaders (these make up the "over" technique (pixel shader version 2.0)
VS_Output_Over VShade_Over(VS_Input_Over inp)
{
VS_Output_Over outp = (VS_Output_Over)0;
outp.pos = mul(inp.pos, viewProj);
outp.altPos = outp.pos;
outp.txc = inp.txc;
return outp;
}
PS_Output PShade_Over(VS_Output_Over inp)
{
PS_Output outp = (PS_Output)0;
outp.col = tex2D(texSampler, inp.txc);
return outp;
}
I've looked about for a "Blended Blit" or something but I can't find anything, and other related searches have only brought up forums implying that rendering a quad with an orthographic projection is the way to go about doing this.
Sorry if I've given far too much detail for this issue but it's both interesting and infuriating and any feedback would be greatly appreciated.
It looks for me that you problem is the mapping of texels to pixels. You must offset a screen-aligned quad with a half pixel to match the texels direct to the screenpixels. This issue is explaines here: Directly Mapping Texels to Pixels (MSDN)
For anyone else hitting a similar wall, my specific problem solved by adjusting the U and V values of the verticies sent to the GPU for the overlaid texture triangles thus:
for (int i = 0; i < 4; i++)
{
overVerts[i].tu += 0.5 / (float)ddat->targetVp->Width; // ddat->targetVp is the viewport in use, and the viewport is the same size as the texture
overVerts[i].tv += 0.5 / (float)ddat->targetVp->Height;
}
See Directly Mapping Texels to Pixels as provided by Gnietschow's answer for an explanation as to why this makes sense.