The following fluid simulation is a translation of a paper by Stam. Something truly terrible has happened. Each time the program is run with a low DIFF=0.01, the values start off small and then rapidly expand, or "blow up". I have checked the math routines carefully. Since the code starts off with one 0.5, mathematically it is multiplying and adding a bunch of zeros, so the end result should be close to zero density and other vectors.
The code is quite long, so I've separated it into chunks and removed extra code. Minus all the beginning and SDL code there are only about 120 lines. I have spent a few hours trying changes to no avail, so help is greatly appreciated.
After some experimentation I believe there may be some floating-point error when DIFF is set too low. When the value is increased from 0.01 to 0.02, the values don't blow up. I don't think this is the entire issue, though.
To be clear, the current answers by 1201ProgramAlarm and vidstige do not resolve the problem.
Sections in bold are important parts, the rest is for completeness.
Beginning stuff, skip
#include <SDL2/SDL.h>
#include <stdio.h>
#include <iostream>
#include <algorithm>
#define IX(i,j) ((i)+(N+2)*(j))
using namespace std;
// Constants
const int SCREEN_WIDTH = 600;
const int SCREEN_HEIGHT = 600; // Should match SCREEN_WIDTH
const int N = 20; // Grid size
const int SIM_LEN = 1000;
const int DELAY_LENGTH = 40; // ms
const float VISC = 0.01;
const float dt = 0.1;
const float DIFF = 0.01;
const bool DISPLAY_CONSOLE = false; // Console or graphics
const bool DRAW_GRID = false; // implement later
const int nsize = (N+2)*(N+2);
Math routines Diffuse routines divide by 1+4*a. Does this imply density must be <= 1?
void set_bnd(int N, int b, vector<float> &x)
{
// removed
}
inline void lin_solve(int N, int b, vector<float> &x, vector<float> &x0, float a, float c)
{
for (int k=0; k<20; k++)
{
for (int i=1; i<=N; i++)
{
for (int j=1; j<=N; j++)
{
x[IX(i,j)] = (x0[IX(i,j)] + a*(x[IX(i-1,j)]+x[IX(i+1,j)]+x[IX(i,j-1)]+x[IX(i,j+1)])) / c;
}
}
set_bnd ( N, b, x );
}
}
// Add forces
void add_source(vector<float> &x, vector<float> &s, float dt)
{
for (int i=0; i<nsize; i++) x[i] += dt*s[i];
}
// Diffusion with Gauss-Seidel relaxation
void diffuse(int N, int b, vector<float> &x, vector<float> &x0, float diff, float dt)
{
float a = dt*diff*N*N;
lin_solve(N, b, x, x0, a, 1+4*a);
}
// Backwards advection
void advect(int N, int b, vector<float> &d, vector<float> &d0, vector<float> &u, vector<float> &v, float dt)
{
float dt0 = dt*N;
for (int i=1; i<=N; i++)
{
for (int j=1; j<=N; j++)
{
float x = i - dt0*u[IX(i,j)];
float y = j - dt0*v[IX(i,j)];
if (x<0.5) x=0.5; if (x>N+0.5) x=N+0.5;
int i0=(int)x; int i1=i0+1;
if (y<0.5) y=0.5; if (y>N+0.5) y=N+0.5;
int j0=(int)y; int j1=j0+1;
float s1 = x-i0; float s0 = 1-s1; float t1 = y-j0; float t0 = 1-t1;
d[IX(i,j)] = s0*(t0*d0[IX(i0,j0)] + t1*d0[IX(i0,j1)]) +
s1*(t0*d0[IX(i1,j0)] + t1*d0[IX(i1,j1)]);
}
}
set_bnd(N, b, d);
}
}
void project(int N, vector<float> &u, vector<float> &v, vector<float> &p, vector<float> &div)
{
float h = 1.0/N;
for (int i=1; i<=N; i++)
{
for (int j=1; j<=N; j++)
{
div[IX(i,j)] = -0.5*h*(u[IX(i+1,j)] - u[IX(i-1,j)] +
v[IX(i,j+1)] - v[IX(i,j-1)]);
p[IX(i,j)] = 0;
}
}
set_bnd(N, 0, div); set_bnd(N, 0, p);
lin_solve(N, 0, p, div, 1, 4);
for (int i=1; i<=N; i++)
{
for (int j=1; j<=N; j++)
{
u[IX(i,j)] -= 0.5*(p[IX(i+1,j)] - p[IX(i-1,j)])/h;
v[IX(i,j)] -= 0.5*(p[IX(i,j+1)] - p[IX(i,j-1)])/h;
}
}
set_bnd(N, 1, u); set_bnd(N, 2, v);
}
Density and velocity solver
// Density solver
void dens_step(int N, vector<float> &x, vector<float> &x0, vector<float> &u, vector<float> &v, float diff, float dt)
{
add_source(x, x0, dt);
swap(x0, x); diffuse(N, 0, x, x0, diff, dt);
swap(x0, x); advect(N, 0, x, x0, u, v, dt);
}
// Velocity solver: addition of forces, viscous diffusion, self-advection
void vel_step(int N, vector<float> &u, vector<float> &v, vector<float> &u0, vector<float> &v0, float visc, float dt)
{
add_source(u, u0, dt); add_source(v, v0, dt);
swap(u0, u); diffuse(N, 1, u, u0, visc, dt);
swap(v0, v); diffuse(N, 2, v, v0, visc, dt);
project(N, u, v, u0, v0);
swap(u0, u); swap(v0, v);
advect(N, 1, u, u0, u0, v0, dt); advect(N, 2, v, v0, u0, v0, dt);
project(N, u, v, u0, v0);
}
I considered floating-point inconsistencies, but after compiling with -ffloat-store the problem still persisted.
The problem is related to a lack of normalization in add_source().
When your density becomes sufficiently stationary (x0 very similar in distribution to x, up to a scale factor), then add_source() effectively multiplies x by about 1+dt, leading to your exponential blowup. High values of DIFF mask this effect by weighing x more heavily over x0 in lin_solve(), meaning that the effective multiplier is brought closer to 1, but is still above 1.
The effect, then is that with every iteration, more mass is added. If it cannot "spread out" fast enough at the edges, it will start piling up. Once the density becomes perfectly stationary, it will increase in mass at an exponential rate determined by 1+dt/(4a).
With your given settings (dt=0.1, a=0.1*0.01*20*20=0.4), this is 1+0.1/1.6 ~ 1.06.
The fix is to normalize in add_source:
x[i] = (x[i]+dt*s[i])/(1.0f+dt);
, or to compute the c argument to lin_solve() as 1+4*a+dt. Either will force the mass to drop.
One source of trouble is in lin_solve. Your i and j loops start at zero, but you reference IX(i-1,j), which will access the out of bounds array element x[-1].
Seeing this I immediately felt I had to answer. I read this article way back when it was published. I've implemented his stuff on Android and just love it. I even met the fellow when speaking at UmeƄ in the early 2000s, and he's a very friendly fellow. And tall. :)
So to the problem. You are not doing a velocity propagation step, I think this is critical for not "blowing up" if I remember correctly.
I'm having trouble working out the time for my two maxsubarray functions to run. (right at the bottom of the code)
The output it gives me:
Inputsize: 101 Time using Brute Force:0 Time Using DivandCon: 12
is correct for the second time I use clock() but for the first difference diff1 it just gives me 0 and I'm not sure why?
Edit: Revised Code.
Edit2: Added Output.
#include <iostream>
#include <cmath>
#include <cstdlib>
#include <ctime>
#include <limits.h>
using namespace std;
int Kedane(int a[], int size)
{
int max_so_far = 0, max_ending_here = 0;
int i;
for(i = 0; i < size; i++)
{
max_ending_here = max_ending_here + a[i];
if(max_ending_here < 0)
max_ending_here = 0;
if(max_so_far < max_ending_here)
max_so_far = max_ending_here;
}
return max_so_far;
}
int BruteForce(int array[],int n)
{
int sum,ret=0;
for(int j=-1;j<=n-2;j++)
{
sum=0;
for(int k=j+1;k<+n-1;k++)
{
sum+=array[k];
if(sum>ret)
{
ret=sum;
}
}
}
return ret;
}
//------------------------------------------------------
// FUNCTION WHICH FINDS MAX OF 2 INTS
int max(int a, int b) { return (a > b)? a : b; }
// FUNCTION WHICH FINDS MAX OF 3 NUMBERS
// CALL MAX FUNCT FOR 2 VARIS TWICE!
int max(int a, int b, int c) { return max(max(a, b), c); }
// WORKS OUT FROM MIDDLE+1->RIGHT THE MAX SUM &
// THE MAX SUM FROM MIDDLE->LEFT + RETURNS SUM OF THESE
int maxCrossingSum(int arr[], int l, int m, int h)
{
int sum = 0; // LEFT OF MID
int LEFTsum = INT_MIN; // INITIALLISES SUM TO LOWEST POSSIBLE INT
for (int i = m; i >= l; i--)
{
sum = sum + arr[i];
if (sum > LEFTsum)
LEFTsum = sum;
}
sum = 0; // RIGHT OF MID
int RIGHTsum = INT_MIN;
for (int i = m+1; i <= h; i++)
{
sum = sum + arr[i];
if (sum > RIGHTsum)
RIGHTsum = sum;
}
// RETURN SUM OF BOTH LEFT AND RIGHT SIDE MAX'S
return LEFTsum + RIGHTsum;
}
// Returns sum of maxium sum subarray in aa[l..h]
int maxSubArraySum(int arr[], int l, int h)
{
// Base Case: Only one element
if (l == h)
return arr[l];
// Find middle point
int m = (l + h)/2;
/* Return maximum of following three possible cases
a) Maximum subarray sum in left half
b) Maximum subarray sum in right half
c) Maximum subarray sum such that the subarray crosses the midpoint */
return max(maxSubArraySum(arr, l, m),
maxSubArraySum(arr, m+1, h),
maxCrossingSum(arr, l, m, h));
}
// DRIVER
int main(void)
{
std::srand (time(NULL));
// CODE TO FILL ARRAY WITH RANDOMS [-50;50]
int size=30000;
int array[size];
for(int i=0;i<=size;i++)
{
array[i]=(std::rand() % 100) -50;
}
// TIMING VARI'S
clock_t t1,t2;
clock_t A,B;
clock_t K1,K2;
volatile int mb, md, qq;
//VARYING ELEMENTS IN THE ARRAY
for(int n=101;n<size;n=n+100)
{
t1=clock();
mb=BruteForce(array,n);
t2=clock();
A=clock();
md=maxSubArraySum(array, 0, n-1) ;
B=clock();
K1=clock();
qq=Kedane(array, n);
K2=clock();
cout<< n << "," << (double)t2-(double)t1 << ","<<(double)B-(double)A << ","<<(double)K2-(double)K1<<endl;
}
return 0;
}
101,0,0,0
201,0,0,0
301,1,0,0
401,0,0,0
501,0,0,0
601,0,0,0
701,0,0,0
801,1,0,0
901,1,0,0
1001,0,0,0
1101,1,0,0
1201,1,0,0
1301,0,0,0
1401,1,0,0
1501,1,0,0
1601,2,0,0
1701,1,0,0
1801,2,0,0
1901,1,1,0
2001,1,0,0
2101,2,0,0
2201,3,0,0
2301,2,0,0
2401,3,0,0
2501,3,0,0
2601,3,0,0
2701,4,0,0
2801,4,0,0
2901,4,0,0
3001,4,0,0
3101,4,0,0
3201,5,0,0
3301,5,0,0
3401,6,0,0
3501,5,0,0
3601,6,0,0
3701,6,0,0
3801,8,0,0
3901,7,0,0
4001,8,0,0
4101,7,0,0
4201,10,1,0
4301,9,0,0
4401,8,0,0
4501,9,0,0
4601,10,0,0
4701,11,0,0
4801,11,0,0
4901,11,0,0
5001,12,0,1
5101,11,1,0
5201,13,0,0
5301,13,0,0
5401,15,0,0
5501,14,0,0
5601,16,0,0
5701,15,0,0
5801,15,1,0
5901,16,0,0
6001,17,0,0
6101,18,0,0
6201,18,0,0
6301,19,0,0
6401,21,0,0
6501,19,0,0
6601,21,1,0
6701,20,0,0
6801,22,0,0
6901,23,0,0
7001,22,0,0
7101,24,0,0
7201,26,0,0
7301,26,0,0
7401,24,1,0
7501,26,0,0
7601,27,0,0
7701,28,0,0
7801,28,0,0
7901,30,0,0
8001,29,0,0
8101,31,0,0
8201,31,1,0
8301,35,0,0
8401,33,0,0
8501,35,0,0
8601,35,1,0
8701,35,0,0
8801,36,1,0
8901,37,0,0
9001,38,0,0
9101,39,0,0
9201,41,1,0
9301,40,0,0
9401,41,0,0
9501,42,0,0
9601,45,0,0
9701,45,0,0
9801,44,0,0
9901,47,0,0
10001,47,0,0
10101,48,0,0
10201,50,0,0
10301,51,0,0
10401,50,0,0
10501,51,0,0
10601,53,0,0
10701,55,0,0
10801,54,0,0
10901,56,0,0
11001,57,0,0
11101,56,0,0
11201,60,0,0
11301,60,0,0
11401,61,1,0
11501,61,1,0
11601,63,0,0
11701,62,1,0
11801,66,1,0
11901,65,0,0
12001,68,1,0
12101,68,0,0
12201,70,0,0
12301,71,0,0
12401,72,0,0
12501,73,1,0
12601,73,1,0
12701,76,0,0
12801,77,0,0
12901,78,1,0
13001,79,1,0
13101,80,0,0
13201,83,0,0
13301,82,0,0
13401,86,0,0
13501,85,1,0
13601,86,0,0
13701,89,0,0
13801,90,0,1
13901,90,0,0
14001,91,0,0
14101,97,0,0
14201,93,0,0
14301,96,0,0
14401,99,0,0
14501,100,0,0
14601,101,0,0
14701,101,0,0
14801,103,1,0
14901,104,0,0
15001,107,0,0
15101,108,0,0
15201,109,0,0
15301,109,0,0
15401,114,0,0
15501,114,0,0
15601,115,0,0
15701,116,0,0
15801,119,0,0
15901,118,0,0
16001,124,0,0
16101,123,1,0
16201,123,1,0
16301,125,0,0
16401,127,1,0
16501,128,1,0
16601,131,0,0
16701,132,0,0
16801,134,0,0
16901,134,1,0
17001,135,1,0
17101,139,0,0
17201,139,0,0
17301,140,1,0
17401,143,0,0
17501,145,0,0
17601,147,0,0
17701,147,0,0
17801,150,1,0
17901,152,1,0
18001,153,0,0
18101,155,0,0
18201,157,0,0
18301,157,1,0
18401,160,0,0
18501,160,1,0
18601,163,1,0
18701,165,0,0
18801,169,0,0
18901,171,0,1
19001,170,1,0
19101,173,1,0
19201,178,0,0
19301,175,1,0
19401,176,1,0
19501,180,0,0
19601,180,1,0
19701,182,1,0
19801,184,0,0
19901,187,1,0
20001,188,1,0
20101,191,0,0
20201,192,1,0
20301,193,1,0
20401,195,0,0
20501,199,0,0
20601,200,0,0
20701,201,0,0
20801,209,1,0
20901,210,0,0
21001,206,0,0
21101,210,0,0
21201,210,0,0
21301,213,0,0
21401,215,1,0
21501,217,1,0
21601,218,1,0
21701,221,1,0
21801,222,1,0
21901,226,1,0
22001,225,1,0
22101,229,0,0
22201,232,0,0
22301,233,1,0
22401,234,1,0
22501,237,1,0
22601,238,0,1
22701,243,0,0
22801,242,1,0
22901,246,1,0
23001,246,0,0
23101,250,1,0
23201,250,1,0
23301,254,1,0
23401,254,0,0
23501,259,0,1
23601,260,1,0
23701,263,1,0
23801,268,0,0
23901,266,1,0
24001,271,0,0
24101,272,1,0
24201,274,1,0
24301,280,0,1
24401,279,0,0
24501,281,0,0
24601,285,0,0
24701,288,0,0
24801,289,0,0
24901,293,0,0
25001,295,1,0
25101,299,1,0
25201,299,1,0
25301,302,0,0
25401,305,1,0
25501,307,0,0
25601,310,1,0
25701,315,0,0
25801,312,1,0
25901,315,0,0
26001,320,1,0
26101,320,0,0
26201,322,0,0
26301,327,1,0
26401,329,0,0
26501,332,1,0
26601,339,1,0
26701,334,1,0
26801,337,0,0
26901,340,0,0
27001,341,1,0
27101,342,1,0
27201,347,0,0
27301,348,1,0
27401,351,1,0
27501,353,0,0
27601,356,1,0
27701,360,0,1
27801,361,1,0
27901,362,1,0
28001,366,1,0
28101,370,0,1
28201,372,0,0
28301,375,1,0
28401,377,1,0
28501,380,0,0
28601,384,1,0
28701,384,0,0
28801,388,1,0
28901,391,1,0
29001,392,1,0
29101,399,1,0
29201,399,0,0
29301,404,1,0
29401,405,0,0
29501,409,1,0
29601,412,2,0
29701,412,1,0
29801,422,1,0
29901,419,1,0
The return values from BruteForce and maxSubArraySum are never used, and this gives the compiler a lot of lattitude when it comes to optimizing them.
On my machine for example, using clang -O3 reduces the call to BruteForce to a vector copy and nothing else.
One method for forcing the evaluation of these functions is to write their results to volatile variables:
volatile int mb, md;
// ...
mb = BruteForce(array, n);
// ...
md = maxSubArraySum(array, 0, n-1);
As the variables are volatile, the value given by the right-hand side of the assignments must be stored, despite the absence of any other side-effects, which prevents the compiler from optimising the computation away.