HLSL error X3082 - hlsl

I have the following pixel shader function in HLSL:
float GammaCorrectA(float3 color, float alpha) {
float a = 1 - sqrt(1 - alpha);
float b = sqrt(alpha);
float t = (color.x + color.y + color.z) / 3;
return a * (1 - t) + b^t;
}
This is being called thusly:
float screen = GammaCorrectA(strokeColor, alpha);
strokeColor and alpha are both floats.
This is resulting in the following error on the 'return' line:
error X3082: int or unsigned int type required.
I am compiling with Shader Model 4 Level 9_3, with optimizations disabled and debugging information enabled.
I can't seem to find this error documented on MSDN, or anywhere.
Broadly, can anybody point me to where this error is documented? I would like to understand what's going on here. More specifically, can anybody spot my error?

Instead of b^t use the HLSL pow function:
return a * (1 - t) + pow(b, t);

Related

Differences between NVCC and NVRTC on compilation to PTX

Summary
I'm porting a simple raytracing application based on the Scratchapixel version to a bunch of GPU libraries. I sucessfully ported it to CUDA using the runtime API and the driver API, but It throws a Segmentation fault (core dumped) when I try to use the PTX compiled at runtime with NVRTC.
If I uncomment the #include <math.h> directive at the beginning of the kernel file (see below), it still works using NVCC (the generated PTX is exactly the same) but fails at compilation using NVRTC.
I want to know how can I make NVRTC behave just like NVCC (is it even possible?), or at least to understand the reason behind this issues.
Detailed description
File kernel.cu (Kernel source):
//#include <math.h>
#define MAX_RAY_DEPTH 5
template<typename T>
class Vec3
{
public:
T x, y, z;
__device__ Vec3() : x(T(0)), y(T(0)), z(T(0)) {}
__device__ Vec3(T xx) : x(xx), y(xx), z(xx) {}
__device__ Vec3(T xx, T yy, T zz) : x(xx), y(yy), z(zz) {}
__device__ Vec3& normalize()
{
T nor2 = length2();
if (nor2 > 0) {
T invNor = 1 / sqrt(nor2);
x *= invNor, y *= invNor, z *= invNor;
}
return *this;
}
__device__ Vec3<T> operator * (const T &f) const { return Vec3<T>(x * f, y * f, z * f); }
__device__ Vec3<T> operator * (const Vec3<T> &v) const { return Vec3<T>(x * v.x, y * v.y, z * v.z); }
__device__ T dot(const Vec3<T> &v) const { return x * v.x + y * v.y + z * v.z; }
__device__ Vec3<T> operator - (const Vec3<T> &v) const { return Vec3<T>(x - v.x, y - v.y, z - v.z); }
__device__ Vec3<T> operator + (const Vec3<T> &v) const { return Vec3<T>(x + v.x, y + v.y, z + v.z); }
__device__ Vec3<T>& operator += (const Vec3<T> &v) { x += v.x, y += v.y, z += v.z; return *this; }
__device__ Vec3<T>& operator *= (const Vec3<T> &v) { x *= v.x, y *= v.y, z *= v.z; return *this; }
__device__ Vec3<T> operator - () const { return Vec3<T>(-x, -y, -z); }
__device__ T length2() const { return x * x + y * y + z * z; }
__device__ T length() const { return sqrt(length2()); }
};
typedef Vec3<float> Vec3f;
typedef Vec3<bool> Vec3b;
class Sphere
{
public:
const char* id;
Vec3f center; /// position of the sphere
float radius, radius2; /// sphere radius and radius^2
Vec3f surfaceColor, emissionColor; /// surface color and emission (light)
float transparency, reflection; /// surface transparency and reflectivity
int animation_frame;
Vec3b animation_position_rand;
Vec3f animation_position;
Sphere(
const char* id,
const Vec3f &c,
const float &r,
const Vec3f &sc,
const float &refl = 0,
const float &transp = 0,
const Vec3f &ec = 0) :
id(id), center(c), radius(r), radius2(r * r), surfaceColor(sc),
emissionColor(ec), transparency(transp), reflection(refl)
{
animation_frame = 0;
}
//[comment]
// Compute a ray-sphere intersection using the geometric solution
//[/comment]
__device__ bool intersect(const Vec3f &rayorig, const Vec3f &raydir, float &t0, float &t1) const
{
Vec3f l = center - rayorig;
float tca = l.dot(raydir);
if (tca < 0) return false;
float d2 = l.dot(l) - tca * tca;
if (d2 > radius2) return false;
float thc = sqrt(radius2 - d2);
t0 = tca - thc;
t1 = tca + thc;
return true;
}
};
__device__ float mix(const float &a, const float &b, const float &mixval)
{
return b * mixval + a * (1 - mixval);
}
__device__ Vec3f trace(
const Vec3f &rayorig,
const Vec3f &raydir,
const Sphere *spheres,
const unsigned int spheres_size,
const int &depth)
{
float tnear = INFINITY;
const Sphere* sphere = NULL;
// find intersection of this ray with the sphere in the scene
for (unsigned i = 0; i < spheres_size; ++i) {
float t0 = INFINITY, t1 = INFINITY;
if (spheres[i].intersect(rayorig, raydir, t0, t1)) {
if (t0 < 0) t0 = t1;
if (t0 < tnear) {
tnear = t0;
sphere = &spheres[i];
}
}
}
// if there's no intersection return black or background color
if (!sphere) return Vec3f(2);
Vec3f surfaceColor = 0; // color of the ray/surfaceof the object intersected by the ray
Vec3f phit = rayorig + raydir * tnear; // point of intersection
Vec3f nhit = phit - sphere->center; // normal at the intersection point
nhit.normalize(); // normalize normal direction
// If the normal and the view direction are not opposite to each other
// reverse the normal direction. That also means we are inside the sphere so set
// the inside bool to true. Finally reverse the sign of IdotN which we want
// positive.
float bias = 1e-4; // add some bias to the point from which we will be tracing
bool inside = false;
if (raydir.dot(nhit) > 0) nhit = -nhit, inside = true;
if ((sphere->transparency > 0 || sphere->reflection > 0) && depth < MAX_RAY_DEPTH) {
float facingratio = -raydir.dot(nhit);
// change the mix value to tweak the effect
float fresneleffect = mix(pow(1 - facingratio, 3), 1, 0.1);
// compute reflection direction (not need to normalize because all vectors
// are already normalized)
Vec3f refldir = raydir - nhit * 2 * raydir.dot(nhit);
refldir.normalize();
Vec3f reflection = trace(phit + nhit * bias, refldir, spheres, spheres_size, depth + 1);
Vec3f refraction = 0;
// if the sphere is also transparent compute refraction ray (transmission)
if (sphere->transparency) {
float ior = 1.1, eta = (inside) ? ior : 1 / ior; // are we inside or outside the surface?
float cosi = -nhit.dot(raydir);
float k = 1 - eta * eta * (1 - cosi * cosi);
Vec3f refrdir = raydir * eta + nhit * (eta * cosi - sqrt(k));
refrdir.normalize();
refraction = trace(phit - nhit * bias, refrdir, spheres, spheres_size, depth + 1);
}
// the result is a mix of reflection and refraction (if the sphere is transparent)
surfaceColor = (
reflection * fresneleffect +
refraction * (1 - fresneleffect) * sphere->transparency) * sphere->surfaceColor;
}
else {
// it's a diffuse object, no need to raytrace any further
for (unsigned i = 0; i < spheres_size; ++i) {
if (spheres[i].emissionColor.x > 0) {
// this is a light
Vec3f transmission = 1;
Vec3f lightDirection = spheres[i].center - phit;
lightDirection.normalize();
for (unsigned j = 0; j < spheres_size; ++j) {
if (i != j) {
float t0, t1;
if (spheres[j].intersect(phit + nhit * bias, lightDirection, t0, t1)) {
transmission = 0;
break;
}
}
}
surfaceColor += sphere->surfaceColor * transmission *
max(float(0), nhit.dot(lightDirection)) * spheres[i].emissionColor;
}
}
}
return surfaceColor + sphere->emissionColor;
}
extern "C" __global__
void raytrace_kernel(unsigned int width, unsigned int height, Vec3f *image, Sphere *spheres, unsigned int spheres_size, float invWidth, float invHeight, float aspectratio, float angle) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < height && x < width) {
float xx = (2 * ((x + 0.5) * invWidth) - 1) * angle * aspectratio;
float yy = (1 - 2 * ((y + 0.5) * invHeight)) * angle;
Vec3f raydir(xx, yy, -1);
raydir.normalize();
image[y*width+x] = trace(Vec3f(0), raydir, spheres, spheres_size, 0);
}
}
I can successfully compile it with: nvcc --ptx kernel.cu -o kernel.ptx (full PTX here) and use that PTX in the driver API with cuModuleLoadDataEx using the following snippet. It works as expected.
It works fine even if I uncomment the #include <math.h> line (actually, the PTX generated is exactly the same).
CudaSafeCall( cuInit(0) );
CUdevice device;
CudaSafeCall( cuDeviceGet(&device, 0) );
CUcontext context;
CudaSafeCall( cuCtxCreate(&context, 0, device) );
unsigned int error_buffer_size = 1024;
std::vector<CUjit_option> options;
std::vector<void*> values;
char* error_log = new char[error_buffer_size];
options.push_back(CU_JIT_ERROR_LOG_BUFFER); //Pointer to a buffer in which to print any log messages that reflect errors
values.push_back(error_log);
options.push_back(CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES); //Log buffer size in bytes. Log messages will be capped at this size (including null terminator)
values.push_back(&error_buffer_size);
options.push_back(CU_JIT_TARGET_FROM_CUCONTEXT); //Determines the target based on the current attached context (default)
values.push_back(0); //No option value required for CU_JIT_TARGET_FROM_CUCONTEXT
CUmodule module;
CUresult status = cuModuleLoadDataEx(&module, ptxSource, options.size(), options.data(), values.data());
if (error_log && error_log[0]) { //https://stackoverflow.com/a/7970669/3136474
std::cout << "Compiler error: " << error_log << std::endl;
}
CudaSafeCall( status );
However, whenever I try to compile this exact kernel using NVRTC (full PTX here), it compiles successfully but gives me a Segmentation fault (core dumped) on the call to cuModuleLoadDataEx (when trying to use the resulting PTX).
If I uncomment the #include <math.h> line, it fails at the nvrtcCompileProgram call with the following output:
nvrtcSafeBuild() failed at cuda_raytracer_nvrtc_api.cpp:221 : NVRTC_ERROR_COMPILATION
Build log:
/usr/include/bits/mathcalls.h(177): error: linkage specification is incompatible with previous "isinf"
__nv_nvrtc_builtin_header.h(126689): here
/usr/include/bits/mathcalls.h(211): error: linkage specification is incompatible with previous "isnan"
__nv_nvrtc_builtin_header.h(126686): here
2 errors detected in the compilation of "kernel.cu".
The code I'm using to compile it with NVRTC is:
nvrtcProgram prog;
NvrtcSafeCall( nvrtcCreateProgram(&prog, kernelSource, "kernel.cu", 0, NULL, NULL) );
// https://docs.nvidia.com/cuda/nvrtc/index.html#group__options
std::vector<const char*> compilationOpts;
compilationOpts.push_back("--device-as-default-execution-space");
// NvrtcSafeBuild is a macro which automatically prints nvrtcGetProgramLog if the compilation fails
NvrtcSafeBuild( nvrtcCompileProgram(prog, compilationOpts.size(), compilationOpts.data()), prog );
size_t ptxSize;
NvrtcSafeCall( nvrtcGetPTXSize(prog, &ptxSize) );
char* ptxSource = new char[ptxSize];
NvrtcSafeCall( nvrtcGetPTX(prog, ptxSource) );
NvrtcSafeCall( nvrtcDestroyProgram(&prog) );
Then I simply load the ptxSource using the previous snippet (note: that code block is the same used for both the driver API version and the NVRTC version).
Additional things that I've noticed/tried so far
The PTX generated by the NVCC and the one generated by NVRTC are quite different, but I'm unable to understand them to identify possible problems.
Tried to specify the specific GPU architecture (in my case, CC 6.1) to the compiler, no difference.
Tried to disable any compiler optimizations (options --ftz=false --prec-sqrt=true --prec-div=true --fmad=false in nvrtcCompileProgram). PTX file got bigger, but still Segfaulting.
Tried to add --std=c++11 or --std=c++14 to the NVRTC compiler options. With any of them NVRTC generates an almost empty (4 lines) PTX but issue no warning nor error until I try to use it.
Environment
SO: Ubuntu 18.04.4 LTS 64-bit
nvcc --version: Cuda compilation tools, release 10.1, V10.1.168. Built on Wed_Apr_24_19:10:27_PDT_2019
gcc --version: gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
Hardware: Intel I7-7700HQ, GeForce GTX 1050 Ti
Edit on OP+1 day
I forgot to add my environment. See previous section.
Also can you compile the nvrtc output with ptxas? – #talonmies' comment
The nvcc-generated PTX compiles with a warning:
$ ptxas -o /tmp/temp_ptxas_output.o kernel.ptx
ptxas warning : Stack size for entry function 'raytrace_kernel' cannot be statically determined
Which is due to the recursive kernel function (more on that).
It can be safely ignored.
The nvrtc-generated PTX does not compile and issues the error:
$ ptxas -o /tmp/temp_ptxas_output.o nvrtc_kernel.ptx
ptxas fatal : Unresolved extern function '_Z5powiffi'
Based on this question I added __device__ to Sphere class constructor and removed --device-as-default-execution-space compiler option.
It generates a slightly different PTX now, but still presents the same error.
Compiling with the #include <math.h> now generates a lot of "A function without execution space annotations is considered a host function, and host functions are not allowed in JIT mode." warnings besides the previous errors.
If I try to use the accepted solution of the question it throws me a bunch of syntax errors and does not compile. NVCC still works flawlessly.
Just found the culprit by the ancient comment-and-test method: the error goes away if I remove the pow call used to calculate the fresnel effect inside the trace method.
For now, I've just replaced pow(var, 3) for var*var*var.
I created a MVCE and filled a bug report to NVIDIA: https://developer.nvidia.com/nvidia_bug/2917596.
Which Liam Zhang answered and pointed me the problem:
The issue in your code is that there is an incorrect option value being passed to cuModuleLoadDataEx. In lines:
options.push_back(CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES); //Log buffer size in bytes. Log messages will be capped at this size (including null terminator)
values.push_back(&error_buffer_size);
the buffer size option is provided, but instead of passing a value with the size, a pointer to that value is passed. Since this pointer is then read as a number, the driver assumed a much larger buffer size than 1024.
During the NVRTC compilation a "Unresolved extern function" error occurred, because the pow function signature, as you can find in the documentation is:
__device__​ double pow ( double x, double y )
When the driver tried to zero the buffer when putting the error message in it, the segfault happened.
Without the call to pow, there was no compilation error, so the error buffer was not used and there was no segfault.
To ensure the device code is correct, the values used to call pow function as well as the output pointer should be a double number, or a float equivalent function, powf, could be used.
If I change the call to values.push_back((void*)error_buffer_size); it reports the same error as ptxas compilation of the generated PTX:
Compiler error: ptxas fatal : Unresolved extern function '_Z5powiffi'
cudaSafeCall() failed at file.cpp:74 : CUDA_ERROR_INVALID_PTX - a PTX JIT compilation failed

Multiply two complex number on GPU using OpenCL

I'm trying to write a OpenCL based code to calculate exp() of some complex numbers on GPU using the following kernel function:
#include <complex.h>
inline float complex exp(float complex z) {
return (exp(__real__(z)) * (cos(__imag__(z)) + sin(__imag__(z))*I ));
}
__kernel void
calculate(__global float * c)
{
int nIndex = get_global_id(0);
float complex rays = 1.0f + 1.0f * I;
float complex ans = exp(rays);
c[nIndex] = __real__(ans * ans);
}
But I get the following error:
ASSERTION FAILED: I.hasStructRetAttr() == false
The * works well with other complex numbers but it produce error for multiplying exp() functions output. Also I use + and - operators with exp() functions output without any problem. Just I have problem with * and / operators.

DirectX HLSL shader implicit truncation of vector type error

Hi I'm getting an error in one my pixel shaders, implicit truncation of vector type.
Here is the code causing the error:
float3 colour = 0;
float3 ppColour = SceneTexture.Sample(PointSample, ppIn.UV);
float4 col = SceneTexture.Sample(PointSample, ppIn.UV);
float intensity = 0.0f;
float r = SceneTexture.Sample(PointSample, ppIn.UV).r;
float g = SceneTexture.Sample(PointSample, ppIn.UV).g;
float b = SceneTexture.Sample(PointSample, ppIn.UV).b;
float a = SceneTexture.Sample(PointSample, ppIn.UV).a;
intensity = r + g + b + a;
if (intensity > 5.0f)
{
for (int count = 0; count < 13; count++)
{
colour += SceneTexture.Sample(TrilinearSampler, ppIn.UV + PixelKernel[count] * BlurStrength) * BlurWeights[count];
}
return float4(colour, 1.0f);
}
return float4(ppColour, 1.0f);
If I comment out intensity = r + g + b + a; then the project compiles. Can anyone see what I'm doing wrong, thanks.
The reason you get this error is, that you are mulitplying/adding up float3's and float4's. You should 'cast' float3 to float4 with float4(float3, 1.0f) or float4.xyz (which makes it float3)
Inferring some of the 'extra' stuff that is required to actually compile the shader (uniforms, inputs, and making the code into an actual function), I get the following output when trying to compile it:
test.ps(16,9): warning X3206: implicit truncation of vector type
test.ps(29,11): warning X3206: implicit truncation of vector type
test.ps(29,11): error X4014: cannot have gradient operations inside loops with divergent flow control
As you can see, the truncations messages are just warnings, not errors, unless you are using /WX to compile. The issue with these warnings, is that you are assigning the result of a texture sample to a float3, but the return is actually a float4. You can either select the appropriate components with a swizzle, or, change the variable type. For example:
float4 ppColour = SceneTexture.Sample(PointSample, ppIn.UV);
The reason for the actual error, is that you cannot do sample interpolation in dynamic loops, or loops inside conditional statements. In this case, your loop is inside the if (intensity > 5.0) conditional. You have two options, either you can remove the conditional, or, you can use SampleLevel instead of Sample:
colour += SceneTexture.SampleLevel(TrilinearSampler, ppIn.UV + PixelKernel[count] * BlurStrength, 0) * BlurWeights[count];
Note: using SampleLevel like this will always sample the top mip-level.

Ray tracing - refraction bug

I am writing a ray tracer. So far I have diffuse, Blinn lighting and reflections. Something has gone wrong with my refractions and I have no idea what. I'm hoping someone can help me out.
I have a big red diffuse + Blinn sphere and a small refractive one with refraction index n = 1.5.
The small one is just really screwed up.
Relevant code:
ReflectiveSurface::ReflectiveSurface(const Color& _n, const Color& _k) :
F0(Color(((_n - 1)*(_n - 1) + _k * _k) / ((_n + 1)*(_n + 1) + _k * _k))) {}
Color ReflectiveSurface::F(const Point& N, const Point& V) const {
float cosa = fabs(N * V);
return F0 + (F0 * (-1) + 1) * pow(1 - cosa, 5);
}
Color ReflectiveSurface::getColor(const Incidence& incidence, const Scene& scene, int traceDepth) const {
Point reflectedDir = reflect(incidence.normal, incidence.direction);
Ray ray = Ray(incidence.point + reflectedDir * epsilon, reflectedDir);
return F(incidence.normal, incidence.direction) * scene.rayTrace(ray, traceDepth + 1);
}
Point ReflectiveSurface::reflect(const Point& N, const Point& V) const {
return V - N * (2 * (N * V));
}
bool RefractiveSurface::refractionDir(Point& T, Point& N, const Point& V) const {
float cosa = -(N * V), cn = n;
if (cosa < 0) { cosa = -cosa; N = N * (-1); cn = 1 / n; }
float disc = 1 - (1 - cosa * cosa) / cn / cn;
if (disc < 0) return false;
T = V / cn + N * (cosa / cn - sqrt(disc));
return true;
}
RefractiveSurface::RefractiveSurface(float _n, const Color& _k) : ReflectiveSurface(Color(1, 1, 1) * _n, _k) {}
Surface* RefractiveSurface::copy() { return new RefractiveSurface(*this); }
Color RefractiveSurface::getColor(const Incidence& incidence, const Scene& scene, int traceDepth) const {
Incidence I = Incidence(incidence);
Color reflectedColor, refractedColor;
Point direction = reflect(I.normal, I.direction);
Ray reflectedRay = Ray(I.point + direction * epsilon, direction);
if (refractionDir(direction, I.normal, I.direction)) {
Ray refractedRay = Ray(I.point + direction * epsilon, direction);
Color colorF = F(I.normal, I.direction);
reflectedColor = colorF * scene.rayTrace(reflectedRay, traceDepth + 1);
refractedColor = (Color(1, 1, 1) - colorF) * scene.rayTrace(refractedRay, traceDepth + 1);
}
else {
reflectedColor = scene.rayTrace(reflectedRay, traceDepth + 1);
}
return reflectedColor + refractedColor;
}
The code is all over the place, since this is a homework and I'm not allowed to include additional headers and I have to send it in in one cpp file, so i had to separate every class into forward declaration, declaration and implementation in that one file. It makes me vomit but I tried to keep it as clean as possible. There is tons of code so I only included what I thought was most related. ReflectiveSurface is RefractiveSurface's parent class. N is the surface normal, V is the ray direction vector this normal, n is the refraction index. The incidence structure holds a point, a normal and a direction vector.
Formulas for the Fersnel approximation and the refraction vector respectively:
You can see in the code that I use an epsilon * ray direction value to avoid shadow acne caused by float imprecision. Something similar seems to be happening to the small sphere, though.
Another screenshot:
As you can see, the sphere doesn't appear transparent, but it does inherit the diffuse sphere's color. It also usually has some white pixels.
Without refraction:
RefractiveSurface::refractionDir takes the normal N by (non-const) reference, and it may invert it. This seems dangerous. It's not clear the caller wants I.normal to be flipped, as it's used in color calculations further down.
Also, refracted_color is not always initialized (unless the Color constructor makes it black).
Try (temporarily) simplifying and just see if the refracted rays hit where you expect. Remove the Fresnel computation and the reflection component and just set refracted_color to the result of the trace of the refracted ray. That will help determine if the bug is in the Fresnel calculation or in the geometry of bending the ray.
A debugging tip: Color the pixels that don't hit anything with something other than black. That makes it easy to distinguish the misses from the shadows (surface acne).
The answer turned out to be pretty simple, but it took me like 3 days of staring at the code to catch the bug. I have a Surface class, I derive from it two classes: RoughSurface (diffuse+blinn) and RelfectiveSurface. Then, RefractiveSurace is derived from RefleciveSurface. ReflectiveSurface's constructor takes the refractive index(n) and the extinction value (k) as parameters, but doesn't store them. (F0) is computed from them during construction, and then they are lost. RefractiveSurface, on the other hand, uses (n) in the refraction angle calculation.
Old constructor:
RefractiveSurface::RefractiveSurface(float _n, const Color& _k) :
ReflectiveSurface(Color(1, 1, 1) * _n, _k) {}
New Constructor:
RefractiveSurface::RefractiveSurface(float _n, const Color& _k) :
ReflectiveSurface(Color(1, 1, 1) * _n, _k), n(_n) {}
As you can see, I forgot to save the (n) value for RefractiveSurface in the constructor.
Small red sphere behind big glass sphere lit from the two sides of the camera:
It looks awesome in motion!D
Thank you for your time, guys. Gotta finish this homework, then I'll rewrite the whole thing and optimize the hell out of it.

error X8000 : D3D11 Internal Compiler error : Invalid Bytecode: Invalid operand type for operand #1 of opcode #86 (counts are 1-based)

I'm absolutely stumped as well as my instructors/lab-assistants.
For some reason, the following HLSL code is returning this in the output window:
error X8000 : D3D11 Internal Compiler error : Invalid Bytecode: Invalid operand type for operand #1 of opcode #86 (counts are 1-based).
Here's the function in the HLSL causing the issue:
// Projects a sphere diameter large in screen space to calculate desired tesselation factor
float SphereToScreenSpaceTessellation(float3 p0, float3 p1, float diameter)
{
float3 centerPoint = (p0 + p1) * 0.5f;
float4 point0 = mul( float4(centerPoint,1.0f) , gTileWorldView);
float4 point1 = point0;
point1.x += diameter;
float4 point0ClipSpace = mul(point0, gTileProj);
float4 point1ClipSpace = mul(point1, gTileProj);
point0ClipSpace /= point0ClipSpace.w;
point1ClipSpace /= point1ClipSpace.w;
point0ClipSpace.xy *= gScreenSize;
point1ClipSpace.xy *= gScreenSize;
float projSizeOfEdge = distance(point0ClipSpace, point1ClipSpace);
float result = projSizeOfEdge / gTessellatedTriWidth;
return clamp(result, 0, 64);
}
I've narrowed it down to the point where it may be the "mul" intrinsic. We've taken everything out of the code and tried to return out a temporary variable like this, and it works fine:
float SphereToScreenSpaceTessellation(float3 p0, float3 p1, float diameter)
{
float temp = 0;
float3 centerPoint = (p0 + p1) * 0.5f;
float4 point0 = mul( float4(centerPoint,1.0f) , gTileWorldView);
float4 point1 = point0;
point1.x += diameter;
float4 point0ClipSpace = mul(point0, gTileProj);
float4 point1ClipSpace = mul(point1, gTileProj);
point0ClipSpace /= point0ClipSpace.w;
point1ClipSpace /= point1ClipSpace.w;
point0ClipSpace.xy *= gScreenSize;
point1ClipSpace.xy *= gScreenSize;
float projSizeOfEdge = distance(point0ClipSpace, point1ClipSpace);
float result = projSizeOfEdge / gTessellatedTriWidth;
return temp;
//return clamp(result, 0, 64);
}
If anyone is wondering:
gTileWorldView, gTileProj are float4x4's in a .hlsli file
gScreenSize is a float2 in a .hlsli file.
gTessellatedTriWidth is a float in a .hlsli file.
The following function is as states in a 2011 NVidia shader at : http://dx11-xpr.googlecode.com/svn/trunk/XPR/Media/Effects/TerrainTessellation.fx
I tried to copy and paste their solution replacing their variables with the one above, and the same error listed happens.
I'm absolutely stumped and I need assistance in order to do this assignment, please help.
Check out this line:
point0ClipSpace.xy *= gScreenSize;
Is gScreenSize a float2? I do not believe you can scalar multiply a vec by any vec type.