add some fluid sim work
All checks were successful
studiorailgun/Renderer/pipeline/head This commit looks good

This commit is contained in:
austin 2024-12-06 12:01:43 -05:00
parent e6e469bec1
commit bd80b09d41
3 changed files with 303 additions and 20 deletions

View File

@ -77,9 +77,17 @@ typedef struct {
#define SPARSE_ARRAY_REAL_DATA_SIZE (SPARSE_ARRAY_CHUNK_DIM * MAIN_ARRAY_DIM) #define SPARSE_ARRAY_REAL_DATA_SIZE (SPARSE_ARRAY_CHUNK_DIM * MAIN_ARRAY_DIM)
/** /**
* The size of the dimension of the memory of the sparse array * Size of the cells we care about in the sparse array
*/ */
#define SPARSE_ARRAY_RAW_DIM (SPARSE_ARRAY_REAL_DATA_SIZE + SPARSE_ARRAY_BORDER_SIZE) #define SPARSE_ARRAY_ACTUAL_DATA_DIM (SPARSE_ARRAY_REAL_DATA_SIZE + SPARSE_ARRAY_BORDER_SIZE)
/**
* The size of the dimension of the memory of the sparse array
* While the SPARSE_ARRAY_ACTUAL_DATA_DIM may come out to 114, it will be more efficient CPU-instruction wise
* if we round that up to 128+2
* That way we can call highly vectorized functions (ie avx128 instead of avx64+avx32+avx16+2)
*/
#define SPARSE_ARRAY_RAW_DIM (128 + SPARSE_ARRAY_BORDER_SIZE)
/** /**
* The size of a sparse array in number of elements * The size of a sparse array in number of elements
@ -94,7 +102,7 @@ typedef struct {
/** /**
* The size of the sparse array in spatial units * The size of the sparse array in spatial units
*/ */
#define SPARSE_ARRAY_SPATIAL_SIZE (SPARSE_ARRAY_RAW_DIM * SPATIAL_UNIT) #define SPARSE_ARRAY_SPATIAL_SIZE (SPARSE_ARRAY_ACTUAL_DATA_DIM * SPATIAL_UNIT)
/** /**
* A set of sparse matricies for simulating fluids * A set of sparse matricies for simulating fluids

View File

@ -32,12 +32,12 @@ LIBRARY_API void fluid_sparse_array_diffuse(int b, float * x, float * x0, float
/** /**
* Advects an array * Advects an array
*/ */
LIBRARY_API void fluid_sparse_array_advect(int N, int b, float * d, float * d0, float * u, float * v, float * w, float dt); LIBRARY_API void fluid_sparse_array_advect(int b, float * d, float * d0, float * u, float * v, float * w, float dt);
/** /**
* Projects an array * Projects an array
*/ */
LIBRARY_API void fluid_sparse_array_project(int N, float * u, float * v, float * w, float * p, float * div); LIBRARY_API void fluid_sparse_array_project(float * u, float * v, float * w, float * p, float * div);
/** /**
* Sets the bounds of the simulation * Sets the bounds of the simulation
@ -61,7 +61,7 @@ LIBRARY_API void fluid_sparse_array_dens_step(float * x, float * x0, float * u,
/** /**
* Performs the velocity step * Performs the velocity step
*/ */
LIBRARY_API void fluid_sparse_array_vel_step(int N, float * u, float * v, float * w, float * u0, float * v0, float * w0, float visc, float dt); LIBRARY_API void fluid_sparse_array_vel_step(float * u, float * v, float * w, float * u0, float * v0, float * w0, float visc, float dt);
/** /**
* Solves a linear system of equations in a vectorized manner * Solves a linear system of equations in a vectorized manner
@ -69,6 +69,14 @@ LIBRARY_API void fluid_sparse_array_vel_step(int N, float * u, float * v, float
* @param x The array that will contain the solved equations * @param x The array that will contain the solved equations
* @param x0 The array containing the first order derivatives * @param x0 The array containing the first order derivatives
*/ */
LIBRARY_API void fluid_sparse_array_lin_solve(int b, float * x, float * x0); LIBRARY_API void fluid_sparse_array_lin_solve_diffuse(int b, float * x, float * x0);
/**
* Solves a linear system of equations in a vectorized manner
* @param b The axis to set the bounds along
* @param x The array that will contain the solved equations
* @param x0 The array containing the first order derivatives
*/
LIBRARY_API void fluid_sparse_array_lin_solve_project(int b, float * x, float * x0);
#endif #endif

View File

@ -11,11 +11,37 @@
#define LINEARSOLVERTIMES 10 #define LINEARSOLVERTIMES 10
#define FLUID_SIM_TIMESTEP 0.01
#define DIFFUSE_CONSTANT 0.00001
#define VISCOSITY_CONSTANT 0.00001
/** /**
* Simulates a sparse array * Simulates a sparse array
*/ */
LIBRARY_API int fluid_sparse_array_simulate(SparseChunkArray * array, float dt){ LIBRARY_API int fluid_sparse_array_simulate(SparseChunkArray * array, float dt){
//velocity step
fluid_sparse_array_vel_step(
array->u,
array->v,
array->w,
array->u0,
array->v0,
array->w0,
VISCOSITY_CONSTANT,
FLUID_SIM_TIMESTEP
);
//density step
fluid_sparse_array_dens_step(
array->d,
array->d0,
array->u,
array->v,
array->w,
DIFFUSE_CONSTANT,
FLUID_SIM_TIMESTEP
);
} }
@ -39,14 +65,51 @@ LIBRARY_API void fluid_sparse_array_add_source(float * x, float * s, float dt){
* Diffuses a given array by a diffusion constant * Diffuses a given array by a diffusion constant
*/ */
LIBRARY_API void fluid_sparse_array_diffuse(int b, float * x, float * x0, float diff, float dt){ LIBRARY_API void fluid_sparse_array_diffuse(int b, float * x, float * x0, float diff, float dt){
float a=dt*diff*SPARSE_ARRAY_SPATIAL_SIZE*SPARSE_ARRAY_SPATIAL_SIZE*SPARSE_ARRAY_SPATIAL_SIZE; fluid_sparse_array_lin_solve_diffuse(b, x, x0);
fluid_sparse_array_lin_solve(b, x, x0);
} }
/** /**
* Advects a given array based on the force vectors in the simulation * Advects a given array based on the force vectors in the simulation
*/ */
LIBRARY_API void fluid_sparse_array_advect(int N, int b, float * d, float * d0, float * u, float * v, float * w, float dt){ LIBRARY_API void fluid_sparse_array_advect(int b, float * d, float * d0, float * u, float * v, float * w, float dt){
int i, j, k, i0, j0, k0, i1, j1, k1;
float x, y, z, s0, t0, s1, t1, u1, u0, dtx,dty,dtz;
dtx=dty=dtz=dt*SPARSE_ARRAY_ACTUAL_DATA_DIM;
for(k=(SPARSE_ARRAY_BORDER_SIZE / 2); k<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); k++){
for(j=(SPARSE_ARRAY_BORDER_SIZE / 2); j<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); j++){
for(i=(SPARSE_ARRAY_BORDER_SIZE / 2); i<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); i++){
x = i-dtx*u[IX(i,j,k)]; y = j-dty*v[IX(i,j,k)]; z = k-dtz*w[IX(i,j,k)];
if (x<0.5f) x=0.5f; if (x>SPARSE_ARRAY_RAW_DIM+0.5f) x=SPARSE_ARRAY_RAW_DIM+0.5f; i0=(int)x; i1=i0+1;
if (y<0.5f) y=0.5f; if (y>SPARSE_ARRAY_RAW_DIM+0.5f) y=SPARSE_ARRAY_RAW_DIM+0.5f; j0=(int)y; j1=j0+1;
if (z<0.5f) z=0.5f; if (z>SPARSE_ARRAY_RAW_DIM+0.5f) z=SPARSE_ARRAY_RAW_DIM+0.5f; k0=(int)z; k1=k0+1;
s1 = x-i0; s0 = 1-s1; t1 = y-j0; t0 = 1-t1; u1 = z-k0; u0 = 1-u1;
if(i0 >= SPARSE_ARRAY_RAW_DIM){
i0 = SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2);
}
if(j0 >= SPARSE_ARRAY_RAW_DIM){
j0 = SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2);
}
if(k0 >= SPARSE_ARRAY_RAW_DIM){
k0 = SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2);
}
if(i1 >= SPARSE_ARRAY_RAW_DIM){
i1 = SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2);
}
if(j1 >= SPARSE_ARRAY_RAW_DIM){
j1 = SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2);
}
if(k1 >= SPARSE_ARRAY_RAW_DIM){
k1 = SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2);
}
d[IX(i,j,k)] = s0*(t0*u0*d0[IX(i0,j0,k0)]+t1*u0*d0[IX(i0,j1,k0)]+t0*u1*d0[IX(i0,j0,k1)]+t1*u1*d0[IX(i0,j1,k1)])+
s1*(t0*u0*d0[IX(i1,j0,k0)]+t1*u0*d0[IX(i1,j1,k0)]+t0*u1*d0[IX(i1,j0,k1)]+t1*u1*d0[IX(i1,j1,k1)]);
}
}
}
fluid_sparse_array_set_bnd(b, d);
} }
/** /**
@ -63,20 +126,161 @@ LIBRARY_API void fluid_sparse_array_dens_step(float * x, float * x0, float * u,
fluid_sparse_array_add_source(x, x0, dt); fluid_sparse_array_add_source(x, x0, dt);
SWAP(x0, x); SWAP(x0, x);
fluid_sparse_array_diffuse(0, x, x0, diff, dt); fluid_sparse_array_diffuse(0, x, x0, diff, dt);
// SWAP(x0, x); SWAP(x0, x);
// fluid_sparse_array_advect(N, 0, x, x0, u, v, w, dt); fluid_sparse_array_advect(0, x, x0, u, v, w, dt);
} }
/** /**
* The main velocity step function * The main velocity step function
*/ */
LIBRARY_API void fluid_sparse_array_vel_step(int N, float * u, float * v, float * w, float * u0, float * v0, float * w0, float visc, float dt){ LIBRARY_API void fluid_sparse_array_vel_step(float * u, float * v, float * w, float * u0, float * v0, float * w0, float visc, float dt){
fluid_sparse_array_add_source(u, u0, dt);
fluid_sparse_array_add_source(v, v0, dt);
fluid_sparse_array_add_source(w, w0, dt);
SWAP(u0, u);
fluid_sparse_array_diffuse(1, u, u0, visc, dt);
SWAP(v0, v);
fluid_sparse_array_diffuse(2, v, v0, visc, dt);
SWAP(w0, w);
fluid_sparse_array_diffuse(3, w, w0, visc, dt);
fluid_sparse_array_project(u, v, w, u0, v0);
SWAP(u0, u);
SWAP(v0, v);
SWAP(w0, w);
fluid_sparse_array_advect(1, u, u0, u0, v0, w0, dt);
fluid_sparse_array_advect(2, v, v0, u0, v0, w0, dt);
fluid_sparse_array_advect(3, w, w0, u0, v0, w0, dt);
fluid_sparse_array_project(u, v, w, u0, v0);
} }
/** /**
* Projects a given array based on force vectors * Projects a given array based on force vectors
*/ */
LIBRARY_API void fluid_sparse_array_project(int N, float * u, float * v, float * w, float * p, float * div){ LIBRARY_API void fluid_sparse_array_project(float * u, float * v, float * w, float * p, float * div){
int i, j, k;
__m256 nVector = _mm256_set1_ps(SPARSE_ARRAY_ACTUAL_DATA_DIM);
__m256 constScalar = _mm256_set1_ps(-1.0/2.0);
__m256 zeroVec = _mm256_set1_ps(0);
__m256 vector, vector2, vector3;
//compute central difference approximation to populate for gauss-seidel relaxation
for(k=1; k<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); k++){
for(j=1; j<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); j++){
i = 1;
//
//lower
//
//first part
vector = _mm256_loadu_ps(&u[IX(i+1,j,k)]);
vector = _mm256_sub_ps(vector,_mm256_loadu_ps(&u[IX(i-1,j,k)]));
vector = _mm256_div_ps(vector,nVector);
//second part
vector2 = _mm256_loadu_ps(&v[IX(i,j+1,k)]);
vector2 = _mm256_sub_ps(vector2,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
vector2 = _mm256_div_ps(vector2,nVector);
//third part
vector3 = _mm256_loadu_ps(&w[IX(i,j,k+1)]);
vector3 = _mm256_sub_ps(vector3,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
vector3 = _mm256_div_ps(vector3,nVector);
//multiply and finalize
vector = _mm256_add_ps(vector,_mm256_add_ps(vector2,vector3));
vector = _mm256_mul_ps(vector,constScalar);
//store
_mm256_storeu_ps(&div[IX(i,j,k)],vector);
_mm256_storeu_ps(&p[IX(i,j,k)],zeroVec);
i = 9;
//
//upper
//
//first part
vector = _mm256_loadu_ps(&u[IX(i+1,j,k)]);
vector = _mm256_sub_ps(vector,_mm256_loadu_ps(&u[IX(i-1,j,k)]));
vector = _mm256_div_ps(vector,nVector);
//second part
vector2 = _mm256_loadu_ps(&v[IX(i,j+1,k)]);
vector2 = _mm256_sub_ps(vector2,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
vector2 = _mm256_div_ps(vector2,nVector);
//third part
vector3 = _mm256_loadu_ps(&w[IX(i,j,k+1)]);
vector3 = _mm256_sub_ps(vector3,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
vector3 = _mm256_div_ps(vector3,nVector);
//multiply and finalize
vector = _mm256_add_ps(vector,_mm256_add_ps(vector2,vector3));
vector = _mm256_mul_ps(vector,constScalar);
//store
_mm256_storeu_ps(&div[IX(i,j,k)],vector);
_mm256_storeu_ps(&p[IX(i,j,k)],zeroVec);
}
}
fluid_sparse_array_set_bnd(0, div);
fluid_sparse_array_set_bnd(0, p);
//solve system of equations
fluid_sparse_array_lin_solve_project(0, p, div);
//remove divergence from vector field
constScalar = _mm256_set1_ps(0.5f*SPARSE_ARRAY_ACTUAL_DATA_DIM);
for ( k=1 ; k<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2) ; k++ ) {
for ( j=1 ; j<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2) ; j++ ) {
//
//v
//
//lower
vector = _mm256_loadu_ps(&p[IX(1+1,j,k)]);
vector2 = _mm256_loadu_ps(&p[IX(1-1,j,k)]);
vector = _mm256_sub_ps(vector,vector2);
vector = _mm256_mul_ps(vector,constScalar);
vector = _mm256_sub_ps(_mm256_loadu_ps(&u[IX(1,j,k)]),vector);
_mm256_storeu_ps(&u[IX(1,j,k)],vector);
//upper
vector = _mm256_loadu_ps(&p[IX(9+1,j,k)]);
vector2 = _mm256_loadu_ps(&p[IX(9-1,j,k)]);
vector = _mm256_sub_ps(vector,vector2);
vector = _mm256_mul_ps(vector,constScalar);
vector = _mm256_sub_ps(_mm256_loadu_ps(&u[IX(9,j,k)]),vector);
_mm256_storeu_ps(&u[IX(9,j,k)],vector);
//
//v
//
//lower
vector = _mm256_loadu_ps(&p[IX(1,j+1,k)]);
vector2 = _mm256_loadu_ps(&p[IX(1,j-1,k)]);
vector = _mm256_sub_ps(vector,vector2);
vector = _mm256_mul_ps(vector,constScalar);
vector = _mm256_sub_ps(_mm256_loadu_ps(&v[IX(1,j,k)]),vector);
_mm256_storeu_ps(&v[IX(1,j,k)],vector);
//upper
vector = _mm256_loadu_ps(&p[IX(9,j+1,k)]);
vector2 = _mm256_loadu_ps(&p[IX(9,j-1,k)]);
vector = _mm256_sub_ps(vector,vector2);
vector = _mm256_mul_ps(vector,constScalar);
vector = _mm256_sub_ps(_mm256_loadu_ps(&v[IX(9,j,k)]),vector);
_mm256_storeu_ps(&v[IX(9,j,k)],vector);
//
//w
//
//lower
vector = _mm256_loadu_ps(&p[IX(1,j,k+1)]);
vector2 = _mm256_loadu_ps(&p[IX(1,j,k-1)]);
vector = _mm256_sub_ps(vector,vector2);
vector = _mm256_mul_ps(vector,constScalar);
vector = _mm256_sub_ps(_mm256_loadu_ps(&w[IX(1,j,k)]),vector);
_mm256_storeu_ps(&w[IX(1,j,k)],vector);
//upper
vector = _mm256_loadu_ps(&p[IX(9,j,k+1)]);
vector2 = _mm256_loadu_ps(&p[IX(9,j,k-1)]);
vector = _mm256_sub_ps(vector,vector2);
vector = _mm256_mul_ps(vector,constScalar);
vector = _mm256_sub_ps(_mm256_loadu_ps(&w[IX(9,j,k)]),vector);
_mm256_storeu_ps(&w[IX(9,j,k)],vector);
}
}
fluid_sparse_array_set_bnd(1, u);
fluid_sparse_array_set_bnd(2, v);
fluid_sparse_array_set_bnd(3, w);
} }
/** /**
@ -86,12 +290,75 @@ LIBRARY_API void fluid_sparse_array_project(int N, float * u, float * v, float *
* @param x0 The array containing the first order derivatives * @param x0 The array containing the first order derivatives
* @param a * @param a
*/ */
LIBRARY_API void fluid_sparse_array_lin_solve(int b, float * x, float * x0){ LIBRARY_API void fluid_sparse_array_lin_solve_diffuse(int b, float * x, float * x0){
int i, j, k, l, m; int i, j, k, l, m;
int a = 0; int a = FLUID_SIM_TIMESTEP * DIFFUSE_CONSTANT * SPARSE_ARRAY_ACTUAL_DATA_DIM * SPARSE_ARRAY_ACTUAL_DATA_DIM * SPARSE_ARRAY_ACTUAL_DATA_DIM;
int c = 0; int c = 1+6*a;
__m256 aScalar = _mm256_set1_ps(0);
__m256 cScalar = _mm256_set1_ps(0); __m256 aScalar = _mm256_set1_ps(a);
__m256 cScalar = _mm256_set1_ps(c);
int vectorSize = 8;
// iterate the solver
for(l = 0; l < LINEARSOLVERTIMES; l++){
// update for each cell
for(k = (SPARSE_ARRAY_BORDER_SIZE / 2); k < SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); k++){
for(j = (SPARSE_ARRAY_BORDER_SIZE / 2); j < SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); j++){
int n = 0;
//solve as much as possible vectorized
for(i = (SPARSE_ARRAY_BORDER_SIZE / 2); i < SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); i = i + vectorSize){
__m256 vector = _mm256_loadu_ps(&x[GVI(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[GVI(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[GVI(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[GVI(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[GVI(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[GVI(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[GVI(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&x[GVI(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i > SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2)){
for(i = i - vectorSize; i < (SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2)); i++){
/*
want to solve the second derivative of the grid
x0 stores the first derivative
start with x being 0 in all locations
then take the difference along x0 and store it in x
this is approximating the derivative
equation looks something like
2nd deriv = x0 +
*/
//phi is the scalar potential
//want to solve the laplacian
//laplacian is derivative of phi over the gid spacing squared along each dimension
//the derivative of phi is just phi(x+1) - 2 * phi(x) + phi(x-1) along each axis "x"
//we are grabbing phi from the x array
x[GVI(i,j,k)] = (x0[GVI(i,j,k)] + a*(x[GVI(i-1,j,k)]+x[GVI(i+1,j,k)]+x[GVI(i,j-1,k)]+x[GVI(i,j+1,k)]+x[GVI(i,j,k-1)]+x[GVI(i,j,k+1)]))/c;
}
}
}
}
fluid_sparse_array_set_bnd(b, x);
}
}
/**
* Solves a linear system of equations in a vectorized manner
* @param b The axis to set the bounds along
* @param x The array that will contain the solved equations
* @param x0 The array containing the first order derivatives
*/
LIBRARY_API void fluid_sparse_array_lin_solve_project(int b, float * x, float * x0){
int i, j, k, l, m;
int a = 1;
int c = 6;
__m256 aScalar = _mm256_set1_ps(a);
__m256 cScalar = _mm256_set1_ps(c);
int vectorSize = 8; int vectorSize = 8;
// iterate the solver // iterate the solver
for(l = 0; l < LINEARSOLVERTIMES; l++){ for(l = 0; l < LINEARSOLVERTIMES; l++){