C++ SSE2 intrinsics - c++

I just learned that there's a way to achieve some parallelization using intrinsics. I found the following code and wanted to go through it but I could understand much. I was trying make the operations be in single precision but how can I do that?
#include <stdio.h>
#include <stdlib.h>
#include <xmmintrin.h>
inline double pi_4 (int n){
int i;
__m128d mypart2,x2, b, c, one;
double *x = (double *)malloc(n*sizeof(double));
double *mypart = (double *)malloc(n*sizeof(double));
double sum = 0.0;
double dx = 1.0/n;
double x1[2] __attribute__((aligned(16)));
one = _mm_set_pd1(1.0); // set one to (1,1)
for (i = 0; i < n; i++){
x[i] = dx/2 + dx*i;
}
for (i = 0; i < n; i+=2){
x1[0]=x[i]; x1[1]=x[i+1];
x2 = _mm_load_pd(x1);
b = _mm_mul_pd(x2,x2);
c = _mm_add_pd(b,one);
mypart2 = _mm_div_pd(one,c);
_mm_store_pd(&mypart[i], mypart2);
}
for (i = 0; i < n; i++)
sum += mypart[i];
return sum*dx;
}
int main(){
double res;
res=pi_4(128);
printf("pi = %lf\n", 4*res);
return 0;
}
I was thinking of changing everything from double to float and call the correct intrinsic functions, for instance, instead of _mm_set_pd1 -> _mm_set_ps1. I don't know if this will make the program from double to single precision.
UPDATE
I tried like follows but I'm getting a segmentation fault
#include <stdio.h>
#include <stdlib.h>
#include <xmmintrin.h>
inline float pi_4 (int n){
int i;
__m128 mypart2,x2, b, c, one;
float *x = (float *)malloc(n*sizeof(float));
float *mypart = (float*)malloc(n*sizeof(float));
float sum = 0.0;
float dx = 1.0/n;
float x1[2] __attribute__((aligned(16)));
one = _mm_set_ps1(1.0); // set one to (1,1)
for (i = 0; i < n; i++){
x[i] = dx/2 + dx*i;
}
for (i = 0; i < n; i+=2){
x1[0]=x[i]; x1[1]=x[i+1];
x2 = _mm_load_ps(x1);
b = _mm_mul_ps(x2,x2);
c = _mm_add_ps(b,one);
mypart2 = _mm_div_ps(one,c);
_mm_store_ps(&mypart[i], mypart2);
}
for (i = 0; i < n; i++)
sum += mypart[i];
return sum*dx;
}
int main(){
float res;
res=pi_4(128);
printf("pi = %lf\n", 4*res);
return 0;
}

A few more fixes are needed:
x1 needs to be declared with 4 elements.
The second for loop needs to increment by 4 (this is what caused the segfault).
There need to be 4 assignments to the x1 array.
These changes are all because single-precision packs 4 values into a 16-byte vector register while double-precision packs only 2 values. I think that was it:
#include <stdio.h>
#include <stdlib.h>
#include <xmmintrin.h>
inline float pi_4 (int n){
int i;
__m128 mypart2,x2, b, c, one;
float *x = (float *)malloc(n*sizeof(float));
float *mypart = (float*)malloc(n*sizeof(float));
float sum = 0.0;
float dx = 1.0/n;
float x1[4] __attribute__((aligned(16)));
one = _mm_set_ps1(1.0); // set one to (1,1,1,1)
for (i = 0; i < n; i++){
x[i] = dx/2 + dx*i;
}
for (i = 0; i < n; i+=4){
x1[0]=x[i]; x1[1]=x[i+1];
x1[2]=x[i+2]; x1[3]=x[i+3];
x2 = _mm_load_ps(x1);
b = _mm_mul_ps(x2,x2);
c = _mm_add_ps(b,one);
mypart2 = _mm_div_ps(one,c);
_mm_store_ps(&mypart[i], mypart2);
}
for (i = 0; i < n; i++)
sum += mypart[i];
return sum*dx;
}
int main(){
float res;
res=pi_4(128);
printf("pi = %lf\n", 4*res);
return 0;
}
Drum roll...
$ ./foo
pi = 3.141597
A word on the use of malloc(). I think most implementations will return memory aligned on a 16-byte boundary as required for SSE loads and stores, but that may not be guaranteed as __m128 is not a C/C++ type (it is guaranteed to be aligned for "normal" types). It would be safer to use memalign() or posix_memalign().

Related

Using Eigen class to sum certain numbers in a vector

I am new to C++ and I am using the Eigen library. I was wondering if there was a way to sum certain elements in a vector. For example, say I have a vector that is a 100 by 1 and I just want to sum the first 10 elements. Is there a way of doing that using the Eigen library?
What I am trying to do is this: say I have a vector that is 1000 by 1 and I want to take the mean of the first 10 elements, then the next 10 elements, and so on and store that in some vector. Hence I will have a vector of size 100 of the averages. Any thoughts or suggestions are greatly appreciated.
Here is the beginning steps I have in my code. I have a S_temp4vector that is 1000 by 1. Now I intialize a new vector S_A that I want to have as the vector of the means. Here is my messy sloppy code so far: (Note that my question resides in the crudeMonteCarlo function)
#include <iostream>
#include <cmath>
#include <math.h>
#include <Eigen/Dense>
#include <Eigen/Geometry>
#include <random>
#include <time.h>
using namespace Eigen;
using namespace std;
void crudeMonteCarlo(int N,double K, double r, double S0, double sigma, double T, int n);
VectorXd time_vector(double min, double max, int n);
VectorXd call_payoff(VectorXd S, double K);
int main(){
int N = 100;
double K = 100;
double r = 0.2;
double S0 = 100;
double sigma = 0.4;
double T = 0.1;
int n = 10;
crudeMonteCarlo(N,K,r,S0,sigma,T,n);
return 0;
}
VectorXd time_vector(double min, double max, int n){
VectorXd m(n + 1);
double delta = (max-min)/n;
for(int i = 0; i <= n; i++){
m(i) = min + i*delta;
}
return m;
}
MatrixXd generateGaussianNoise(int M, int N){
MatrixXd Z(M,N);
static random_device rd;
static mt19937 e2(time(0));
normal_distribution<double> dist(0.0, 1.0);
for(int i = 0; i < M; i++){
for(int j = 0; j < N; j++){
Z(i,j) = dist(e2);
}
}
return Z;
}
VectorXd call_payoff(VectorXd S, double K){
VectorXd C(S.size());
for(int i = 0; i < S.size(); i++){
if(S(i) - K > 0){
C(i) = S(i) - K;
}else{
C(i) = 0.0;
}
}
return C;
}
void crudeMonteCarlo(int N,double K, double r, double S0, double sigma, double T, int n){
// Create time vector
VectorXd tt = time_vector(0.0,T,n);
VectorXd t(n);
double dt = T/n;
for(int i = 0; i < n; i++){
t(i) = tt(i+1);
}
// Generate standard normal Z matrix
//MatrixXd Z = generateGaussianNoise(N,n);
// Generate the log normal stock process N times to get S_A for crude Monte Carlo
MatrixXd SS(N,n+1);
MatrixXd Z = generateGaussianNoise(N,n);
for(int i = 0; i < N; i++){
SS(i,0) = S0;
for(int j = 1; j <= n; j++){
SS(i,j) = SS(i,j-1)*exp((double) (r - pow(sigma,2.0))*dt + sigma*sqrt(dt)*(double)Z(i,j-1));
}
}
// This long bit of code gives me my S_A.....
Map<RowVectorXd> S_temp1(SS.data(), SS.size());
VectorXd S_temp2(S_temp1.size());
for(int i = 0; i < S_temp2.size(); i++){
S_temp2(i) = S_temp1(i);
}
VectorXd S_temp3(S_temp2.size() - N);
int count = 0;
for(int i = N; i < S_temp2.size(); i++){
S_temp3(count) = S_temp2(i);
count++;
}
VectorXd S_temp4(S_temp3.size());
for(int i = 0; i < S_temp4.size(); i++){
S_temp4(i) = S_temp3(i);
}
VectorXd S_A(N);
S_A(0) = (S_temp4(0) + S_temp4(1) + S_temp4(2) + S_temp4(3) + S_temp4(4) + S_temp4(5) + S_temp4(6) + S_temp4(7) + S_temp4(8) + S_temp4(9))/(n);
S_A(1) = (S_temp4(10) + S_temp4(11) + S_temp4(12) + S_temp4(13) + S_temp4(14) + S_temp4(15) + S_temp4(16) + S_temp4(17) + S_temp4(18) + S_temp4(19))/(n);
int count1 = 0;
for(int i = 0; i < S_temp4.size(); i++){
S_A(count1) =
}
// Calculate payoff of Asian option
//VectorXd call_fun = call_payoff(S_A,K);
}
This question includes a lot of code, which makes it hard to understand the question you're trying to ask. Consider including only the code specific to your question.
In any case, you can use Eigen directly to do all of these things quite simply. In Eigen, Vectors are just matrices with 1 column, so all of the reasoning here is directly applicable to what you've written.
const Eigen::Matrix<double, 100, 1> v = Eigen::Matrix<double, 100, 1>::Random();
const int num_rows = 10;
const int num_cols = 1;
const int starting_row = 0;
const int starting_col = 0;
const double sum_of_first_ten = v.block(starting_row, starting_col, num_rows, num_cols).sum();
const double mean_of_first_ten = sum_of_first_ten / num_rows;
In summary: You can use .block to get a block object, .sum() to sum that block, and then conventional division to get the mean.
You can reshape the input using Map and then do all sub-summations at once without any loop:
VectorXd A(1000); // input
Map<MatrixXd> B(A.data(), 10, A.size()/10); // reshaped version, no copy
VectorXd res = B.colwise().mean(); // partial reduction, you can also use .sum(), .minCoeff(), etc.
The Eigen documentation at https://eigen.tuxfamily.org/dox/group__TutorialBlockOperations.html says an Eigen block is a rectangular part of a matrix or array accessed by matrix.block(i,j,p,q) where i and j are the starting values (eg 0 and 0) and p and q are the block size (eg 10 and 1). Presumably you would then iterate i in steps of 10, and use std::accumulate or perhaps an explicit summation to find the mean of matrix.block(i,0,10,1).

Anderson Darling Test in C++

I am trying to compute the Anderson-Darling test found here. I followed the steps on Wikipedia and made sure that when I calculate the average and standard deviation of the data I am testing denoted X by using MATLAB. Also, I used a function called phi for computing the standard normal CDF, I have also tested this function to make sure it is correct which it is. Now I seem to have a problem when I actually compute the A-squared (denoted in Wikipedia, I denote it as A in C++).
Here is my function I made for Anderson-Darling Test:
void Anderson_Darling(int n, double X[]){
sort(X,X + n);
// Find the mean of X
double X_avg = 0.0;
double sum = 0.0;
for(int i = 0; i < n; i++){
sum += X[i];
}
X_avg = ((double)sum)/n;
// Find the variance of X
double X_sig = 0.0;
for(int i = 0; i < n; i++){
X_sig += (X[i] - X_avg)*(X[i] - X_avg);
}
X_sig /= n;
// The values X_i are standardized to create new values Y_i
double Y[n];
for(int i = 0; i < n; i++){
Y[i] = (X[i] - X_avg)/(sqrt(X_sig));
//cout << Y[i] << endl;
}
// With a standard normal CDF, we calculate the Anderson_Darling Statistic
double A = 0.0;
for(int i = 0; i < n; i++){
A += -n - 1/n *(2*(i) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n+1 - i])));
}
cout << A << endl;
}
Note, I know that the formula for Anderson-Darling (A-squared) starts with i = 1 to i = n, although when I changed the index to make it work in C++, I still get the same result without changing the index.
The value I get in C++ is:
-4e+006
The value I should get, received in MATLAB is:
0.2330
Any suggestions are greatly appreciated.
Here is my whole code:
#include <iostream>
#include <math.h>
#include <cmath>
#include <random>
#include <algorithm>
#include <chrono>
using namespace std;
double *Box_Muller(int n, double u[]);
double *Beasley_Springer_Moro(int n, double u[]);
void Anderson_Darling(int n, double X[]);
double phi(double x);
int main(){
int n = 2000;
double Mersenne[n];
random_device rd;
mt19937 e2(1);
uniform_real_distribution<double> dist(0, 1);
for(int i = 0; i < n; i++){
Mersenne[i] = dist(e2);
}
// Print Anderson Statistic for Mersenne 6a
double *result = new double[n];
result = Box_Muller(n,Mersenne);
Anderson_Darling(n,result);
return 0;
}
double *Box_Muller(int n, double u[]){
double *X = new double[n];
double Y[n];
double R_2[n];
double theta[n];
for(int i = 0; i < n; i++){
R_2[i] = -2.0*log(u[i]);
theta[i] = 2.0*M_PI*u[i+1];
}
for(int i = 0; i < n; i++){
X[i] = sqrt(-2.0*log(u[i]))*cos(2.0*M_PI*u[i+1]);
Y[i] = sqrt(-2.0*log(u[i]))*sin(2.0*M_PI*u[i+1]);
}
return X;
}
double *Beasley_Springer_Moro(int n, double u[]){
double y[n];
double r[n+1];
double *x = new double(n);
// Constants needed for algo
double a_0 = 2.50662823884; double b_0 = -8.47351093090;
double a_1 = -18.61500062529; double b_1 = 23.08336743743;
double a_2 = 41.39119773534; double b_2 = -21.06224101826;
double a_3 = -25.44106049637; double b_3 = 3.13082909833;
double c_0 = 0.3374754822726147; double c_5 = 0.0003951896511919;
double c_1 = 0.9761690190917186; double c_6 = 0.0000321767881768;
double c_2 = 0.1607979714918209; double c_7 = 0.0000002888167364;
double c_3 = 0.0276438810333863; double c_8 = 0.0000003960315187;
double c_4 = 0.0038405729373609;
// Set r and x to empty for now
for(int i = 0; i <= n; i++){
r[i] = 0.0;
x[i] = 0.0;
}
for(int i = 1; i <= n; i++){
y[i] = u[i] - 0.5;
if(fabs(y[i]) < 0.42){
r[i] = pow(y[i],2.0);
x[i] = y[i]*(((a_3*r[i] + a_2)*r[i] + a_1)*r[i] + a_0)/((((b_3*r[i] + b_2)*r[i] + b_1)*r[i] + b_0)*r[i] + 1);
}else{
r[i] = u[i];
if(y[i] > 0.0){
r[i] = 1.0 - u[i];
r[i] = log(-log(r[i]));
x[i] = c_0 + r[i]*(c_1 + r[i]*(c_2 + r[i]*(c_3 + r[i]*(c_4 + r[i]*(c_5 + r[i]*(c_6 + r[i]*(c_7 + r[i]*c_8)))))));
}
if(y[i] < 0){
x[i] = -x[i];
}
}
}
return x;
}
double phi(double x){
return 0.5 * erfc(-x * M_SQRT1_2);
}
void Anderson_Darling(int n, double X[]){
sort(X,X + n);
// Find the mean of X
double X_avg = 0.0;
double sum = 0.0;
for(int i = 0; i < n; i++){
sum += X[i];
}
X_avg = ((double)sum)/n;
// Find the variance of X
double X_sig = 0.0;
for(int i = 0; i < n; i++){
X_sig += (X[i] - X_avg)*(X[i] - X_avg);
}
X_sig /= (n-1);
// The values X_i are standardized to create new values Y_i
double Y[n];
for(int i = 0; i < n; i++){
Y[i] = (X[i] - X_avg)/(sqrt(X_sig));
//cout << Y[i] << endl;
}
// With a standard normal CDF, we calculate the Anderson_Darling Statistic
double A = -n;
for(int i = 0; i < n; i++){
A += -1.0/(double)n *(2*(i+1) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n - i])));
}
cout << A << endl;
}
Let me guess, your n was 2000. Right?
The major issue here is when you do 1/n in the last expression. 1 is an int and ao is n. When you divide 1 by n it performs integer division. Now 1 divided by any number > 1 is 0 under integer division (think if it as only keeping only integer part of the quotient. What you need to do is cast n as double by writing 1/(double)n.
Rest all should work fine.
Summary from discussions -
Indexes to Y[] should be i and n-1-i respectively.
n should not be added in the loop but only once.
Minor fixes like changing divisor to n instead of n-1 while calculating Variance.
You have integer division here:
A += -n - 1/n *(2*(i) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n+1 - i])));
^^^
1/n is zero when n > 1 - you need to change this to, e.g.: 1.0/n:
A += -n - 1.0/n *(2*(i) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n+1 - i])));
^^^^^

1-d harmonic oscillator recursive function, possible Infinit-loop, c++ syntax

Working on a 1-d Harmonic oscillator using recursive functions, code works in fortran and trying to convert to C++ in order to learn the new syntax.
I have fixed all the notified errors so far searching through google, but still not getting a result. The program just runs seemingly forever without posting results.
I think the likely answer is maybe its not being calculated at all or there some some infinit loop because of a syntax problem in my for loop or called functions? but I am not seeing it and it is not identifying an error with them.
Any advice on why this program is not working properly?
//
// main.cpp
// 1-d HO
//
// Created by Grant Metheny on 3/2/16.
// Copyright (c) 2016 Grant Metheny C++ Codes. All rights reserved.
//
#include <iostream>
#include <vector>
#include <string>
#include <fstream>
#include <cmath>
#include <math.h>
using namespace std;
int i = 0;
int n = 0;
double x = 1.;
double xmax = 5;
double imax = 1000;
double wavefunc = 0;
double fact = 1;
double hpol = 1;
double wf0 = 1;
double wf1 = 1;
double wf2 = 1;
double wf3 = 1;
double wf4 = 1;
double wf5 = 1;
double wf6 = 1;
double wavefunction(int n, double x)
{
return wavefunc = pow(2.0,-(n*.5)) * pow(M_PI,.25) * exp(-(.5*pow(x,2.0)));
}
double factorial(int n)
{
for (i = 0; i <= n; i++)
if (i == 0)
fact = 1.;
else
fact = fact * i;
return fact;
}
double hermite(int n, double x)
{
for (i = 0; i <= n; i++)
if (i==1)
hpol = 1.0;
else if (n==1)
hpol = 2*x;
else
hpol = 2*x*hermite(n-1,x) - 2*(n-1)*hermite(n-2,x);
return hpol;
}
double dx = 2*xmax/imax;
int main(int argc, const char * argv[]) {
for (i=0; i <= imax; i++) {
x = 5. - dx*i;
n = 0;
wf0 = hermite(n,x) * wavefunction(n,x) * pow(factorial(n),(-(.5)));
n = 1;
wf1 = hermite(n,x) * wavefunction(n,x) * pow(factorial(n),(-(.5)));
n = 2;
wf2 = hermite(n,x) * wavefunction(n,x) * pow(factorial(n),(-(.5)));
n = 3;
wf3 = hermite(n,x) * wavefunction(n,x) * pow(factorial(n),(-(.5)));
n = 4;
wf4 = hermite(n,x) * wavefunction(n,x) * pow(factorial(n),(-(.5)));
n = 5;
wf5 = hermite(n,x) * wavefunction(n,x) * pow(factorial(n),(-(.5)));
n = 6;
wf6 = hermite(n,x) * wavefunction(n,x) * pow(factorial(n),(-(.5)));
cout <<"I="<< i <<"X="<< x <<"WF0="<< wf0<<"WF1=" << wf1; // wf2, wf3, wf4, wf5, wf6
}
return 0;
}
Your for loops are not terminating. Instead of this:
for (i = 0; n; i++)
(which is clearly Fortran-inspired), you need to do this:
for (i = 0; i < n; i++)
The idea is that the second part of the for loop is not a limit; it is a predicate that should evaluate to true (to keep the loop going one more iteration) or false (to exit the loop). Because C/C++ treats non-zero integer values as true whenever a true/false value is expected, the loops just keep on going.
You need to make that correction in several places in your code.

C++: What is wrong with the output?

This code should output 0 0.25 0.5 0.75 1, instead it outputs zeros. Why is that?
Define a function u(x)=x;
void pde_advect_IC(double* x, double* u)
{
int N = sizeof(x) / sizeof(x[0]); //size of vector u
for (int i = 0; i <= N; i++)
u[i] = x[i];
}
Here is the implementation:
int main()
{
double a = 0.0;
double b = 1.0;
int nx = 4;
double dx = (b - a) / double(nx);
double xx[nx + 1]; //array xx with intervals
// allocate memory for vectors of solutions u0
double* u0 = new double [nx + 1];
//fill in array x
for (int i = 0; i <= nx; i++)
xx[i] = a + double(i) * dx;
pde_advect_IC(xx, u0); // u0 = x (initial conditions)
for (int i = 0; i <= nx; i++)
cout<<u0[i]<<endl;
// de-allocate memory of u0
delete [] u0;
delete [] u1;
return 0;
}
You can't use sizeof(x) because that will return the size of the pointer, not the array you thought you passed to it. You have to specify the size with a third parameter or use something more convenient like an std::vector and use size().
This works.
#include <iostream>
#include <cstdlib>
using namespace std;
void pde_advect_IC(double* x, double* u, const int& N)
{
for (int i = 0; i < N; i++)
u[i] = x[i];
}
int main()
{
double a = 0.0;
double b = 1.0;
int nx = 4;
double dx = (b - a) / double(nx);
double xx[nx + 1]; //array xx with intervals
// allocate memory for vectors of solutions u0
double* u0 = new double [nx + 1];
//fill in array x
for (int i = 0; i <= nx; i++)
xx[i] = a + double(i) * dx;
pde_advect_IC(xx, u0, nx + 1); // u0 = x (initial conditions)
for (int i = 0; i <= nx; i++)
cout << u0[i] << endl;
// de-allocate memory of u0
delete [] u0;
return 0;
}
Note that I added const int& N to pde_advect_IC() in order to pass it the size of the array, by const reference, to be sure it does not get modified by mistake.
Note that your trick with sizeof() does not work with pointers.

Perlin noise, blocky,c++

Okay first of all, I am trying to implement the Perlin noise algorithm, and I managed to achived something strange, and I can't find the solution. I am using matlab to visualize the results I have already checked this question:
"Blocky" Perlin noise
I am doing it from this website:
http://freespace.virgin.net/hugo.elias/models/m_perlin.htm
And another website which I can't find right now but I will update as soon as I can.
So here are some pictures about the problem:
This is the problem if increase zoom
http://i.stack.imgur.com/KkD7u.png
And here are the .cpp-s:
//perlin.cpp
#include "Perlin_H.h"
#include <stdlib.h>
#include <math.h>
#include <iostream>
#include <random>
using namespace std;
double Perlin::interp1(double a, double b, double x) {
double ft = x * 3.1415927;
double f = (1.0-cos(ft)) * 0.5;
//return (b-x > b-1/2) ? b-x : a+x;
return a * (1.0-f) + b * f;
}
double Perlin::smoothNoise(double x,double y) {
double corners = ( rand2(x-1, y-1)+rand2(x+1, y-1)+rand2(x-1, y+1)+rand2(x+1, y+1) ) / 16;
double sides = ( rand2(x-1, y) +rand2(x+1, y) +rand2(x, y-1) +rand2(x, y+1) ) / 8;
double center = rand2(x,y)/4;
return corners + sides +center;
}
double Perlin::lininterp1(double a,double b, double x) {
return a*(1-x) + b * x;
}
double Perlin::rand2(double x, double y) {
int n = (int)x + (int)y*57;
//n=pow((n<<13),n);
n=(n<<13)^n;
return ( 1.0 - ( (n * (n * n * 15731 + 789221) + 1376312589) & 0x7fffffff) / 1073741824.0);
}
double Perlin::noise(double x, double y) {
double floorX = (double)floor(x);
double floorY = (double)floor(y);
double s,t,u,v;
s = smoothNoise(floorX,floorY);
t = smoothNoise(floorX+1,floorY);
u = smoothNoise(floorY,floorY+1);
v = smoothNoise(floorX+1,floorY+1);
double int1 = interp1(s,t,x-floorX);
double int2 = interp1(u,v,x-floorX);
return interp1(int1,int2,y-floorY);
}
//main.cpp
#include "Perlin_H.h"
#include <stdlib.h>
#include <math.h>
#include <iostream>
#include <fstream>;
using namespace std;
int main() {
const int h=64,w=64,octaves=2;
double p=1/1;
double zoom = 30;
Perlin perlin;
double map[h][w];
ofstream output;
output.open("map.txt");
for(int i = 0; i < h ; i++) {
for(int j = 0; j < w ; j++) {
map[i][j] = 0;
}
}
double freq = 2;
for(int i = 0; i < h ; i++) {
for(int j = 0; j < w ; j++) {
double getnoise = 0;
for(int a=0; a < octaves; a++) {
double freq = pow(2,a);
double amp = pow(p,a);
getnoise = perlin.noise((((double)i)*freq)/zoom-(a*10),
((((double)j))*freq)/zoom+(a*10))*amp;
int color = (int)((getnoise * 128.0) + 128.0);
if(color > 255) color = 255;
if(color < 0) color = 0;
map[i][j] = color;
}
output << map[i][j] << "\t";
}
output << "\n";
}
output.close();
system("PAUSE");
return 0;
}
It's a typo!
s = smoothNoise(floorX,floorY);
t = smoothNoise(floorX+1,floorY);
u = smoothNoise(floorY,floorY+1);
v = smoothNoise(floorX+1,floorY+1);
Try:
u = smoothNoise(floorX, floorY +1)
This explains why the diagonal didn't have the blocky appearance (where x=y), and why many of the common feature shapes are subtly off in a mirrored and skewed fashion.
Since it is generally obvious that rand2(floor(y), floor(y)+1) != rand2(floor(x), floor(y+1)) the cell discontinuity will result.
Finding no mathematical error in your implementation, I suspect this is a number format issue.
Such block patterns are created when the grid point values are not actually the same when fetched from different sides - when rand2(floor(n) +1 ,y) != rand2(floor(n+1) ,y)
To fix it, declare floorX to be an int or long instead, and pass it as such to smoothNoise() and rand2().
This can happen due to floating point error in the representation of the Integer values floorX , floorX + 1. The epsilon of magnitude ulp or less can have either sign. the results of addition [floor(n) + 1] and flooring directly [floor(n+1)] are bound by different code, and so need not share a pattern of choosing which side to err on. When the results err on different sides, the int type cast strips the 0.99999999999 and the 0.0000000001 equally, treating the mathematically equivalent numbers as different.