add some fluid sim work

2024-12-06 12:01:43 -05:00 · 2024-12-06 12:01:43 -05:00 · bd80b09d41
commit bd80b09d41
parent e6e469bec1
3 changed files with 303 additions and 20 deletions
--- a/src/main/c/includes/fluid/chunk.h
+++ b/src/main/c/includes/fluid/chunk.h
@ -77,9 +77,17 @@ typedef struct {
 #define SPARSE_ARRAY_REAL_DATA_SIZE (SPARSE_ARRAY_CHUNK_DIM * MAIN_ARRAY_DIM)
 /**
- * The size of the dimension of the memory of the sparse array
+ * Size of the cells we care about in the sparse array
 */
-#define SPARSE_ARRAY_RAW_DIM (SPARSE_ARRAY_REAL_DATA_SIZE + SPARSE_ARRAY_BORDER_SIZE)
+#define SPARSE_ARRAY_ACTUAL_DATA_DIM (SPARSE_ARRAY_REAL_DATA_SIZE + SPARSE_ARRAY_BORDER_SIZE)
 /**
 * The size of the dimension of the memory of the sparse array
 * While the SPARSE_ARRAY_ACTUAL_DATA_DIM may come out to 114, it will be more efficient CPU-instruction wise
 * if we round that up to 128+2
 * That way we can call highly vectorized functions (ie avx128 instead of avx64+avx32+avx16+2)
 */
 #define SPARSE_ARRAY_RAW_DIM (128 + SPARSE_ARRAY_BORDER_SIZE)
 /**
 * The size of a sparse array in number of elements
@ -94,7 +102,7 @@ typedef struct {
 /**
 * The size of the sparse array in spatial units
 */
-#define SPARSE_ARRAY_SPATIAL_SIZE (SPARSE_ARRAY_RAW_DIM * SPATIAL_UNIT)
+#define SPARSE_ARRAY_SPATIAL_SIZE (SPARSE_ARRAY_ACTUAL_DATA_DIM * SPATIAL_UNIT)
 /**
 * A set of sparse matricies for simulating fluids
--- a/src/main/c/includes/fluid/sparsesimulator.h
+++ b/src/main/c/includes/fluid/sparsesimulator.h
@ -32,12 +32,12 @@ LIBRARY_API void fluid_sparse_array_diffuse(int b, float * x, float * x0, float
 /**
 * Advects an array
 */
-LIBRARY_API void fluid_sparse_array_advect(int N, int b, float * d, float * d0, float * u, float * v, float * w, float dt);
+LIBRARY_API void fluid_sparse_array_advect(int b, float * d, float * d0, float * u, float * v, float * w, float dt);
 /**
 * Projects an array
 */
-LIBRARY_API void fluid_sparse_array_project(int N, float * u, float * v, float * w, float * p, float * div);
+LIBRARY_API void fluid_sparse_array_project(float * u, float * v, float * w, float * p, float * div);
 /**
 * Sets the bounds of the simulation
@ -61,7 +61,7 @@ LIBRARY_API void fluid_sparse_array_dens_step(float * x, float * x0, float * u,
 /**
 * Performs the velocity step
 */
-LIBRARY_API void fluid_sparse_array_vel_step(int N, float * u, float * v, float * w, float * u0, float * v0, float * w0, float visc, float dt);
+LIBRARY_API void fluid_sparse_array_vel_step(float * u, float * v, float * w, float * u0, float * v0, float * w0, float visc, float dt);
 /**
 * Solves a linear system of equations in a vectorized manner
@ -69,6 +69,14 @@ LIBRARY_API void fluid_sparse_array_vel_step(int N, float * u, float * v, float
 * @param x The array that will contain the solved equations
 * @param x0 The array containing the first order derivatives
 */
-LIBRARY_API void fluid_sparse_array_lin_solve(int b, float * x, float * x0);
+LIBRARY_API void fluid_sparse_array_lin_solve_diffuse(int b, float * x, float * x0);
 /**
 * Solves a linear system of equations in a vectorized manner
 * @param b The axis to set the bounds along
 * @param x The array that will contain the solved equations
 * @param x0 The array containing the first order derivatives
 */
 LIBRARY_API void fluid_sparse_array_lin_solve_project(int b, float * x, float * x0);
 #endif
--- a/src/main/c/src/fluid/sparsesimulator.c
+++ b/src/main/c/src/fluid/sparsesimulator.c
@ -11,11 +11,37 @@
 #define LINEARSOLVERTIMES 10
 #define FLUID_SIM_TIMESTEP 0.01
 #define DIFFUSE_CONSTANT 0.00001
 #define VISCOSITY_CONSTANT 0.00001
 /**
 * Simulates a sparse array
 */
 LIBRARY_API int fluid_sparse_array_simulate(SparseChunkArray * array, float dt){
-    
+    //velocity step
    fluid_sparse_array_vel_step(
        array->u,
        array->v,
        array->w,
        array->u0,
        array->v0,
        array->w0,
        VISCOSITY_CONSTANT,
        FLUID_SIM_TIMESTEP
    );
    //density step
    fluid_sparse_array_dens_step(
        array->d,
        array->d0,
        array->u,
        array->v,
        array->w,
        DIFFUSE_CONSTANT,
        FLUID_SIM_TIMESTEP
    );
 }
@ -39,14 +65,51 @@ LIBRARY_API void fluid_sparse_array_add_source(float * x, float * s, float dt){
 * Diffuses a given array by a diffusion constant
 */
 LIBRARY_API void fluid_sparse_array_diffuse(int b, float * x, float * x0, float diff, float dt){
-    float a=dt*diff*SPARSE_ARRAY_SPATIAL_SIZE*SPARSE_ARRAY_SPATIAL_SIZE*SPARSE_ARRAY_SPATIAL_SIZE;
+    fluid_sparse_array_lin_solve_diffuse(b, x, x0);
    fluid_sparse_array_lin_solve(b, x, x0);
 }
 /**
 * Advects a given array based on the force vectors in the simulation
 */
-LIBRARY_API void fluid_sparse_array_advect(int N, int b, float * d, float * d0, float * u, float * v, float * w, float dt){
+LIBRARY_API void fluid_sparse_array_advect(int b, float * d, float * d0, float * u, float * v, float * w, float dt){
    int i, j, k, i0, j0, k0, i1, j1, k1;
    float x, y, z, s0, t0, s1, t1, u1, u0, dtx,dty,dtz;
    dtx=dty=dtz=dt*SPARSE_ARRAY_ACTUAL_DATA_DIM;
    for(k=(SPARSE_ARRAY_BORDER_SIZE / 2); k<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); k++){
        for(j=(SPARSE_ARRAY_BORDER_SIZE / 2); j<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); j++){
            for(i=(SPARSE_ARRAY_BORDER_SIZE / 2); i<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); i++){
                x = i-dtx*u[IX(i,j,k)]; y = j-dty*v[IX(i,j,k)]; z = k-dtz*w[IX(i,j,k)];
                if (x<0.5f) x=0.5f; if (x>SPARSE_ARRAY_RAW_DIM+0.5f) x=SPARSE_ARRAY_RAW_DIM+0.5f; i0=(int)x; i1=i0+1;
                if (y<0.5f) y=0.5f; if (y>SPARSE_ARRAY_RAW_DIM+0.5f) y=SPARSE_ARRAY_RAW_DIM+0.5f; j0=(int)y; j1=j0+1;
                if (z<0.5f) z=0.5f; if (z>SPARSE_ARRAY_RAW_DIM+0.5f) z=SPARSE_ARRAY_RAW_DIM+0.5f; k0=(int)z; k1=k0+1;
                s1 = x-i0; s0 = 1-s1; t1 = y-j0; t0 = 1-t1; u1 = z-k0; u0 = 1-u1;
                if(i0 >= SPARSE_ARRAY_RAW_DIM){
                    i0 = SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2);
                }
                if(j0 >= SPARSE_ARRAY_RAW_DIM){
                    j0 = SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2);
                }
                if(k0 >= SPARSE_ARRAY_RAW_DIM){
                    k0 = SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2);
                }
                if(i1 >= SPARSE_ARRAY_RAW_DIM){
                    i1 = SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2);
                }
                if(j1 >= SPARSE_ARRAY_RAW_DIM){
                    j1 = SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2);
                }
                if(k1 >= SPARSE_ARRAY_RAW_DIM){
                    k1 = SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2);
                }
                d[IX(i,j,k)] = s0*(t0*u0*d0[IX(i0,j0,k0)]+t1*u0*d0[IX(i0,j1,k0)]+t0*u1*d0[IX(i0,j0,k1)]+t1*u1*d0[IX(i0,j1,k1)])+
                    s1*(t0*u0*d0[IX(i1,j0,k0)]+t1*u0*d0[IX(i1,j1,k0)]+t0*u1*d0[IX(i1,j0,k1)]+t1*u1*d0[IX(i1,j1,k1)]);
            }
        }
    }
    fluid_sparse_array_set_bnd(b, d);
 }
 /**
@ -63,20 +126,161 @@ LIBRARY_API void fluid_sparse_array_dens_step(float * x, float * x0, float * u,
    fluid_sparse_array_add_source(x, x0, dt);
    SWAP(x0, x);
    fluid_sparse_array_diffuse(0, x, x0, diff, dt);
-    // SWAP(x0, x);
+    SWAP(x0, x);
-    // fluid_sparse_array_advect(N, 0, x, x0, u, v, w, dt);
+    fluid_sparse_array_advect(0, x, x0, u, v, w, dt);
 }
 /**
 * The main velocity step function
 */
-LIBRARY_API void fluid_sparse_array_vel_step(int N, float * u, float * v, float * w, float * u0, float * v0, float * w0, float visc, float dt){
+LIBRARY_API void fluid_sparse_array_vel_step(float * u, float * v, float * w, float * u0, float * v0, float * w0, float visc, float dt){
    fluid_sparse_array_add_source(u, u0, dt);
    fluid_sparse_array_add_source(v, v0, dt);
    fluid_sparse_array_add_source(w, w0, dt);
    SWAP(u0, u);
    fluid_sparse_array_diffuse(1, u, u0, visc, dt);
    SWAP(v0, v);
    fluid_sparse_array_diffuse(2, v, v0, visc, dt);
    SWAP(w0, w);
    fluid_sparse_array_diffuse(3, w, w0, visc, dt);
    fluid_sparse_array_project(u, v, w, u0, v0);
    SWAP(u0, u);
    SWAP(v0, v);
    SWAP(w0, w);
    fluid_sparse_array_advect(1, u, u0, u0, v0, w0, dt);
    fluid_sparse_array_advect(2, v, v0, u0, v0, w0, dt);
    fluid_sparse_array_advect(3, w, w0, u0, v0, w0, dt);
    fluid_sparse_array_project(u, v, w, u0, v0);
 }
 /**
 * Projects a given array based on force vectors
 */
-LIBRARY_API void fluid_sparse_array_project(int N, float * u, float * v, float * w, float * p, float * div){
+LIBRARY_API void fluid_sparse_array_project(float * u, float * v, float * w, float * p, float * div){
    int i, j, k;
    __m256 nVector = _mm256_set1_ps(SPARSE_ARRAY_ACTUAL_DATA_DIM);
    __m256 constScalar = _mm256_set1_ps(-1.0/2.0);
    __m256 zeroVec = _mm256_set1_ps(0);
    __m256 vector, vector2, vector3;
    //compute central difference approximation to populate for gauss-seidel relaxation
    for(k=1; k<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); k++){
        for(j=1; j<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); j++){
            i = 1;
            //
            //lower
            //
            //first part
            vector = _mm256_loadu_ps(&u[IX(i+1,j,k)]);
            vector = _mm256_sub_ps(vector,_mm256_loadu_ps(&u[IX(i-1,j,k)]));
            vector = _mm256_div_ps(vector,nVector);
            //second part
            vector2 = _mm256_loadu_ps(&v[IX(i,j+1,k)]);
            vector2 = _mm256_sub_ps(vector2,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
            vector2 = _mm256_div_ps(vector2,nVector);
            //third part
            vector3 = _mm256_loadu_ps(&w[IX(i,j,k+1)]);
            vector3 = _mm256_sub_ps(vector3,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
            vector3 = _mm256_div_ps(vector3,nVector);
            //multiply and finalize
            vector = _mm256_add_ps(vector,_mm256_add_ps(vector2,vector3));
            vector = _mm256_mul_ps(vector,constScalar);
            //store
            _mm256_storeu_ps(&div[IX(i,j,k)],vector);
            _mm256_storeu_ps(&p[IX(i,j,k)],zeroVec);
            i = 9;
            //
            //upper
            //
            //first part
            vector = _mm256_loadu_ps(&u[IX(i+1,j,k)]);
            vector = _mm256_sub_ps(vector,_mm256_loadu_ps(&u[IX(i-1,j,k)]));
            vector = _mm256_div_ps(vector,nVector);
            //second part
            vector2 = _mm256_loadu_ps(&v[IX(i,j+1,k)]);
            vector2 = _mm256_sub_ps(vector2,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
            vector2 = _mm256_div_ps(vector2,nVector);
            //third part
            vector3 = _mm256_loadu_ps(&w[IX(i,j,k+1)]);
            vector3 = _mm256_sub_ps(vector3,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
            vector3 = _mm256_div_ps(vector3,nVector);
            //multiply and finalize
            vector = _mm256_add_ps(vector,_mm256_add_ps(vector2,vector3));
            vector = _mm256_mul_ps(vector,constScalar);
            //store
            _mm256_storeu_ps(&div[IX(i,j,k)],vector);
            _mm256_storeu_ps(&p[IX(i,j,k)],zeroVec);
        }
    }
    fluid_sparse_array_set_bnd(0, div);
    fluid_sparse_array_set_bnd(0, p);
    //solve system of equations
    fluid_sparse_array_lin_solve_project(0, p, div);
    //remove divergence from vector field
    constScalar = _mm256_set1_ps(0.5f*SPARSE_ARRAY_ACTUAL_DATA_DIM);
    for ( k=1 ; k<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2) ; k++ ) {
        for ( j=1 ; j<SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2) ; j++ ) {
            //
            //v
            //
            //lower
            vector =  _mm256_loadu_ps(&p[IX(1+1,j,k)]);
            vector2 = _mm256_loadu_ps(&p[IX(1-1,j,k)]);
            vector =  _mm256_sub_ps(vector,vector2);
            vector = _mm256_mul_ps(vector,constScalar);
            vector = _mm256_sub_ps(_mm256_loadu_ps(&u[IX(1,j,k)]),vector);
            _mm256_storeu_ps(&u[IX(1,j,k)],vector);
            //upper
            vector =  _mm256_loadu_ps(&p[IX(9+1,j,k)]);
            vector2 = _mm256_loadu_ps(&p[IX(9-1,j,k)]);
            vector =  _mm256_sub_ps(vector,vector2);
            vector = _mm256_mul_ps(vector,constScalar);
            vector = _mm256_sub_ps(_mm256_loadu_ps(&u[IX(9,j,k)]),vector);
            _mm256_storeu_ps(&u[IX(9,j,k)],vector);
            //
            //v
            //
            //lower
            vector =  _mm256_loadu_ps(&p[IX(1,j+1,k)]);
            vector2 = _mm256_loadu_ps(&p[IX(1,j-1,k)]);
            vector =  _mm256_sub_ps(vector,vector2);
            vector = _mm256_mul_ps(vector,constScalar);
            vector = _mm256_sub_ps(_mm256_loadu_ps(&v[IX(1,j,k)]),vector);
            _mm256_storeu_ps(&v[IX(1,j,k)],vector);
            //upper
            vector =  _mm256_loadu_ps(&p[IX(9,j+1,k)]);
            vector2 = _mm256_loadu_ps(&p[IX(9,j-1,k)]);
            vector =  _mm256_sub_ps(vector,vector2);
            vector = _mm256_mul_ps(vector,constScalar);
            vector = _mm256_sub_ps(_mm256_loadu_ps(&v[IX(9,j,k)]),vector);
            _mm256_storeu_ps(&v[IX(9,j,k)],vector);
            //
            //w
            //
            //lower
            vector =  _mm256_loadu_ps(&p[IX(1,j,k+1)]);
            vector2 = _mm256_loadu_ps(&p[IX(1,j,k-1)]);
            vector =  _mm256_sub_ps(vector,vector2);
            vector = _mm256_mul_ps(vector,constScalar);
            vector = _mm256_sub_ps(_mm256_loadu_ps(&w[IX(1,j,k)]),vector);
            _mm256_storeu_ps(&w[IX(1,j,k)],vector);
            //upper
            vector =  _mm256_loadu_ps(&p[IX(9,j,k+1)]);
            vector2 = _mm256_loadu_ps(&p[IX(9,j,k-1)]);
            vector =  _mm256_sub_ps(vector,vector2);
            vector = _mm256_mul_ps(vector,constScalar);
            vector = _mm256_sub_ps(_mm256_loadu_ps(&w[IX(9,j,k)]),vector);
            _mm256_storeu_ps(&w[IX(9,j,k)],vector);
        }
    }
    fluid_sparse_array_set_bnd(1, u);
    fluid_sparse_array_set_bnd(2, v);
    fluid_sparse_array_set_bnd(3, w);
 }
 /**
@ -86,12 +290,75 @@ LIBRARY_API void fluid_sparse_array_project(int N, float * u, float * v, float *
 * @param x0 The array containing the first order derivatives
 * @param a 
 */
-LIBRARY_API void fluid_sparse_array_lin_solve(int b, float * x, float * x0){
+LIBRARY_API void fluid_sparse_array_lin_solve_diffuse(int b, float * x, float * x0){
    int i, j, k, l, m;
-    int a = 0;
+    int a = FLUID_SIM_TIMESTEP * DIFFUSE_CONSTANT * SPARSE_ARRAY_ACTUAL_DATA_DIM * SPARSE_ARRAY_ACTUAL_DATA_DIM * SPARSE_ARRAY_ACTUAL_DATA_DIM;
-    int c = 0;
+    int c = 1+6*a;
-    __m256 aScalar = _mm256_set1_ps(0);
+    
-    __m256 cScalar = _mm256_set1_ps(0);
+    __m256 aScalar = _mm256_set1_ps(a);
    __m256 cScalar = _mm256_set1_ps(c);
    int vectorSize = 8;
    // iterate the solver
    for(l = 0; l < LINEARSOLVERTIMES; l++){
        // update for each cell
        for(k = (SPARSE_ARRAY_BORDER_SIZE / 2); k < SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); k++){
            for(j = (SPARSE_ARRAY_BORDER_SIZE / 2); j < SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); j++){
                int n = 0;
                //solve as much as possible vectorized
                for(i = (SPARSE_ARRAY_BORDER_SIZE / 2); i < SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2); i = i + vectorSize){
                    __m256 vector = _mm256_loadu_ps(&x[GVI(i-1,j,k)]);
                    vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[GVI(i+1,j,k)]));
                    vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[GVI(i,j-1,k)]));
                    vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[GVI(i,j+1,k)]));
                    vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[GVI(i,j,k-1)]));
                    vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[GVI(i,j,k+1)]));
                    vector = _mm256_mul_ps(vector,aScalar);
                    vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[GVI(i,j,k)]));
                    vector = _mm256_div_ps(vector,cScalar);
                    _mm256_storeu_ps(&x[GVI(i,j,k)],vector);
                }
                //If there is any leftover, perform manual solving
                if(i > SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2)){
                    for(i = i - vectorSize; i < (SPARSE_ARRAY_RAW_DIM - (SPARSE_ARRAY_BORDER_SIZE / 2)); i++){
                        /*
                        want to solve the second derivative of the grid
                        x0 stores the first derivative
                        start with x being 0 in all locations
                        then take the difference along x0 and store it in x
                        this is approximating the derivative
                        equation looks something like
                        2nd deriv =   x0    +    
                        */
                        //phi is the scalar potential
                        //want to solve the laplacian
                        //laplacian is derivative of phi over the gid spacing squared along each dimension
                        //the derivative of phi is just phi(x+1) - 2 * phi(x) + phi(x-1) along each axis "x"
                        //we are grabbing phi from the x array
                        x[GVI(i,j,k)] = (x0[GVI(i,j,k)] + a*(x[GVI(i-1,j,k)]+x[GVI(i+1,j,k)]+x[GVI(i,j-1,k)]+x[GVI(i,j+1,k)]+x[GVI(i,j,k-1)]+x[GVI(i,j,k+1)]))/c;
                    }
                }
            }
        }
        fluid_sparse_array_set_bnd(b, x);
    }
 }
 /**
 * Solves a linear system of equations in a vectorized manner
 * @param b The axis to set the bounds along
 * @param x The array that will contain the solved equations
 * @param x0 The array containing the first order derivatives
 */
 LIBRARY_API void fluid_sparse_array_lin_solve_project(int b, float * x, float * x0){
    int i, j, k, l, m;
    int a = 1;
    int c = 6;
    __m256 aScalar = _mm256_set1_ps(a);
    __m256 cScalar = _mm256_set1_ps(c);
    int vectorSize = 8;
    // iterate the solver
    for(l = 0; l < LINEARSOLVERTIMES; l++){