move iterator inside diffuse funcs

This commit is contained in:
austin 2024-12-11 11:44:45 -05:00
parent 58a9bafa4d
commit 8aab5319b5
5 changed files with 146 additions and 117 deletions

View File

@ -7,11 +7,6 @@
*/ */
#define FLUID_GRID2_LINEARSOLVERTIMES 2 #define FLUID_GRID2_LINEARSOLVERTIMES 2
/**
* The number of times to relax the vector diffusion solver
*/
#define FLUID_GRID2_VECTOR_DIFFUSE_TIMES 2
/** /**
* Width of a single grid cell * Width of a single grid cell
*/ */

View File

@ -52,30 +52,35 @@ LIBRARY_API void fluid_grid2_solveDiffuseDensity(
__m256 aScalar = _mm256_set1_ps(a); __m256 aScalar = _mm256_set1_ps(a);
__m256 cScalar = _mm256_set1_ps(c); __m256 cScalar = _mm256_set1_ps(c);
//transform u direction for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
for(k=1; k<DIM-1; k++){ //iterate
for(j=1; j<DIM-1; j++){ for(k=1; k<DIM-1; k++){
int n = 0; for(j=1; j<DIM-1; j++){
//solve as much as possible vectorized int n = 0;
for(i = 1; i < DIM-1; i=i+8){ //solve as much as possible vectorized
__m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]); for(i = 1; i < DIM-1; i=i+8){
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)])); __m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j-1,k)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j+1,k)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k-1)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k+1)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k-1)]));
vector = _mm256_mul_ps(vector,aScalar); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k+1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[IX(i,j,k)])); vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_div_ps(vector,cScalar); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[IX(i,j,k)]));
_mm256_storeu_ps(&x[IX(i,j,k)],vector); vector = _mm256_div_ps(vector,cScalar);
} _mm256_storeu_ps(&x[IX(i,j,k)],vector);
//If there is any leftover, perform manual solving }
if(i>DIM-1){ //If there is any leftover, perform manual solving
for(i=i-8; i < DIM-1; i++){ if(i>DIM-1){
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c; for(i=i-8; i < DIM-1; i++){
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
}
} }
} }
} }
//set bounds
fluid_grid2_set_bounds(0,x);
} }
} }

View File

@ -81,17 +81,7 @@ LIBRARY_API void fluid_grid2_simulate(
fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0); fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0);
//solve vector diffusion //solve vector diffusion
for(int l = 0; l < FLUID_GRID2_VECTOR_DIFFUSE_TIMES; l++){ fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,timestep);
//solve vector diffusion
fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,timestep);
//update array for vectors
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,currentChunk->u[CENTER_LOC]);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_V,currentChunk->v[CENTER_LOC]);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_W,currentChunk->w[CENTER_LOC]);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,currentChunk->u0[CENTER_LOC]);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_V,currentChunk->v0[CENTER_LOC]);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_W,currentChunk->w0[CENTER_LOC]);
}
//setup projection //setup projection
fluid_grid2_setupProjection(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,timestep); fluid_grid2_setupProjection(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,timestep);
@ -182,10 +172,8 @@ LIBRARY_API void fluid_grid2_simulate(
//swap vector fields //swap vector fields
fluid_grid2_flip_arrays(currentChunk->d,currentChunk->d0); fluid_grid2_flip_arrays(currentChunk->d,currentChunk->d0);
//diffuse density //diffuse density
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){ fluid_grid2_solveDiffuseDensity(currentChunk->d,currentChunk->d0,timestep);
fluid_grid2_solveDiffuseDensity(currentChunk->d,currentChunk->d0,timestep); fluid_grid2_set_bounds(0,currentChunk->d[CENTER_LOC]);
fluid_grid2_set_bounds(0,currentChunk->d[CENTER_LOC]);
}
//swap all density arrays //swap all density arrays
//swap vector fields //swap vector fields
fluid_grid2_flip_arrays(currentChunk->d,currentChunk->d0); fluid_grid2_flip_arrays(currentChunk->d,currentChunk->d0);
@ -242,6 +230,22 @@ LIBRARY_API void fluid_grid2_simulate(
} }
/**
* Saves a step of the simulation to a file
*/
static inline void fluid_grid2_saveStep(float * values, const char * name){ static inline void fluid_grid2_saveStep(float * values, const char * name){
if(SAVE_STEPS){ if(SAVE_STEPS){
FILE *fp; FILE *fp;
@ -346,8 +350,19 @@ static inline void fluid_grid2_apply_bounds_mask(float * realArr, float * bounds
_mm256_storeu_ps(&realArr[IX(x,y,z)],finalVec); _mm256_storeu_ps(&realArr[IX(x,y,z)],finalVec);
//middle part
x = 8;
//border part
maskedBounds = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
//real part
realVal = _mm256_loadu_ps(&realArr[IX(x,y,z)]);
invertedMask = _mm256_loadu_ps(&fluid_grid2_border_mask_inverted[IX(x,y,z)]);
realPart = _mm256_mul_ps(realVal,invertedMask);
finalVec = _mm256_add_ps(realPart,maskedBounds);
_mm256_storeu_ps(&realArr[IX(x,y,z)],finalVec);
//upper part //upper part
x = 2; x = 10;
//border part //border part
maskedBounds = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]); maskedBounds = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
//real part //real part
@ -382,19 +397,29 @@ static inline void fluid_grid2_apply_neighbors(Chunk * chunk){
*/ */
static inline void fluid_grid2_populate_masked_arr(float * sourceArr, float * workingArr){ static inline void fluid_grid2_populate_masked_arr(float * sourceArr, float * workingArr){
__m256 arrVal, maskVal, masked; __m256 arrVal, maskVal, masked;
int x;
for(int z = 0; z < 18; z++){ for(int z = 0; z < 18; z++){
for(int y = 0; y < 18; y++){ for(int y = 0; y < 18; y++){
//lower part //lower part
arrVal = _mm256_loadu_ps(&sourceArr[IX(0,y,z)]); x = 0;
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(0,y,z)]); arrVal = _mm256_loadu_ps(&sourceArr[IX(x,y,z)]);
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
masked = _mm256_mul_ps(arrVal,maskVal); masked = _mm256_mul_ps(arrVal,maskVal);
_mm256_storeu_ps(&workingArr[IX(0,y,z)],masked); _mm256_storeu_ps(&workingArr[IX(x,y,z)],masked);
//middle part
x = 8;
arrVal = _mm256_loadu_ps(&sourceArr[IX(x,y,z)]);
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
masked = _mm256_mul_ps(arrVal,maskVal);
_mm256_storeu_ps(&workingArr[IX(x,y,z)],masked);
//upper part //upper part
arrVal = _mm256_loadu_ps(&sourceArr[IX(2,y,z)]); x = 10;
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(0,y,z)]); arrVal = _mm256_loadu_ps(&sourceArr[IX(x,y,z)]);
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
masked = _mm256_mul_ps(arrVal,maskVal); masked = _mm256_mul_ps(arrVal,maskVal);
_mm256_storeu_ps(&workingArr[IX(0,y,z)],masked); _mm256_storeu_ps(&workingArr[IX(x,y,z)],masked);
} }
} }
} }

View File

@ -48,8 +48,8 @@ LIBRARY_API void fluid_grid2_solveVectorDiffuse (
float ** jrw0, float ** jrw0,
float dt float dt
){ ){
float a=dt*FLUID_GRID2_VISCOSITY_CONSTANT/(FLUID_GRID2_H*FLUID_GRID2_H); float a = dt*FLUID_GRID2_VISCOSITY_CONSTANT/(FLUID_GRID2_H*FLUID_GRID2_H);
float c=1+6*a; float c = 1+6*a;
int i, j, k, l, m; int i, j, k, l, m;
float * u = GET_ARR_RAW(jru,CENTER_LOC); float * u = GET_ARR_RAW(jru,CENTER_LOC);
float * v = GET_ARR_RAW(jrv,CENTER_LOC); float * v = GET_ARR_RAW(jrv,CENTER_LOC);
@ -61,82 +61,86 @@ LIBRARY_API void fluid_grid2_solveVectorDiffuse (
__m256 aScalar = _mm256_set1_ps(a); __m256 aScalar = _mm256_set1_ps(a);
__m256 cScalar = _mm256_set1_ps(c); __m256 cScalar = _mm256_set1_ps(c);
//transform u direction for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
for(k=1; k<DIM-1; k++){ //transform u direction
for(j=1; j<DIM-1; j++){ for(k=1; k<DIM-1; k++){
int n = 0; for(j=1; j<DIM-1; j++){
//solve as much as possible vectorized //solve as much as possible vectorized
for(i = 1; i < DIM-1; i=i+8){ for(i = 1; i < DIM-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&u[IX(i-1,j,k)]); __m256 vector = _mm256_loadu_ps(&u[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i+1,j,k)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j-1,k)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j+1,k)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k-1)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k+1)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar); vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u0[IX(i,j,k)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar); vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&u[IX(i,j,k)],vector); _mm256_storeu_ps(&u[IX(i,j,k)],vector);
} }
//If there is any leftover, perform manual solving //If there is any leftover, perform manual solving
if(i>DIM-1){ if(i>DIM-1){
for(i=i-8; i < DIM-1; i++){ for(i=i-8; i < DIM-1; i++){
u[IX(i,j,k)] = (u0[IX(i,j,k)] + a*(u[IX(i-1,j,k)]+u[IX(i+1,j,k)]+u[IX(i,j-1,k)]+u[IX(i,j+1,k)]+u[IX(i,j,k-1)]+u[IX(i,j,k+1)]))/c; u[IX(i,j,k)] = (u0[IX(i,j,k)] + a*(u[IX(i-1,j,k)]+u[IX(i+1,j,k)]+u[IX(i,j-1,k)]+u[IX(i,j+1,k)]+u[IX(i,j,k-1)]+u[IX(i,j,k+1)]))/c;
}
} }
} }
} }
}
//transform v direction //transform v direction
for(k=1; k<DIM-1; k++){ for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){ for(j=1; j<DIM-1; j++){
int n = 0; //solve as much as possible vectorized
//solve as much as possible vectorized for(i = 1; i < DIM-1; i=i+8){
for(i = 1; i < DIM-1; i=i+8){ __m256 vector = _mm256_loadu_ps(&v[IX(i-1,j,k)]);
__m256 vector = _mm256_loadu_ps(&v[IX(i-1,j,k)]); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i+1,j,k)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j-1,k)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j+1,k)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k-1)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k+1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k+1)])); vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_mul_ps(vector,aScalar); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v0[IX(i,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v0[IX(i,j,k)])); vector = _mm256_div_ps(vector,cScalar);
vector = _mm256_div_ps(vector,cScalar); _mm256_storeu_ps(&v[IX(i,j,k)],vector);
_mm256_storeu_ps(&v[IX(i,j,k)],vector); }
} //If there is any leftover, perform manual solving
//If there is any leftover, perform manual solving if(i>DIM-1){
if(i>DIM-1){ for(i=i-8; i < DIM-1; i++){
for(i=i-8; i < DIM-1; i++){ v[IX(i,j,k)] = (v0[IX(i,j,k)] + a*(v[IX(i-1,j,k)]+v[IX(i+1,j,k)]+v[IX(i,j-1,k)]+v[IX(i,j+1,k)]+v[IX(i,j,k-1)]+v[IX(i,j,k+1)]))/c;
v[IX(i,j,k)] = (v0[IX(i,j,k)] + a*(v[IX(i-1,j,k)]+v[IX(i+1,j,k)]+v[IX(i,j-1,k)]+v[IX(i,j+1,k)]+v[IX(i,j,k-1)]+v[IX(i,j,k+1)]))/c; }
} }
} }
} }
}
//transform w direction //transform w direction
for(k=1; k<DIM-1; k++){ for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){ for(j=1; j<DIM-1; j++){
int n = 0; //solve as much as possible vectorized
//solve as much as possible vectorized for(i = 1; i < DIM-1; i=i+8){
for(i = 1; i < DIM-1; i=i+8){ __m256 vector = _mm256_loadu_ps(&w[IX(i-1,j,k)]);
__m256 vector = _mm256_loadu_ps(&w[IX(i-1,j,k)]); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i+1,j,k)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j-1,k)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j+1,k)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k-1)])); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k+1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k+1)])); vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_mul_ps(vector,aScalar); vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w0[IX(i,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w0[IX(i,j,k)])); vector = _mm256_div_ps(vector,cScalar);
vector = _mm256_div_ps(vector,cScalar); _mm256_storeu_ps(&w[IX(i,j,k)],vector);
_mm256_storeu_ps(&w[IX(i,j,k)],vector); }
} //If there is any leftover, perform manual solving
//If there is any leftover, perform manual solving if(i>DIM-1){
if(i>DIM-1){ for(i=i-8; i < DIM-1; i++){
for(i=i-8; i < DIM-1; i++){ w[IX(i,j,k)] = (w0[IX(i,j,k)] + a*(w[IX(i-1,j,k)]+w[IX(i+1,j,k)]+w[IX(i,j-1,k)]+w[IX(i,j+1,k)]+w[IX(i,j,k-1)]+w[IX(i,j,k+1)]))/c;
w[IX(i,j,k)] = (w0[IX(i,j,k)] + a*(w[IX(i-1,j,k)]+w[IX(i+1,j,k)]+w[IX(i,j-1,k)]+w[IX(i,j+1,k)]+w[IX(i,j,k-1)]+w[IX(i,j,k+1)]))/c; }
} }
} }
} }
//set bounds
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,u);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_V,v);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_W,w);
} }
} }

View File

@ -53,7 +53,7 @@ int fluid_sim_grid2_velocity_diffuse_test1(){
fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0); fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0);
//diffuse density //diffuse density
//solve vector diffusion //solve vector diffusion
for(int l = 0; l < FLUID_GRID2_VECTOR_DIFFUSE_TIMES; l++){ for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
//solve vector diffusion //solve vector diffusion
fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,FLUID_GRID2_SIM_STEP); fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,FLUID_GRID2_SIM_STEP);
//update array for vectors //update array for vectors
@ -113,7 +113,7 @@ int fluid_sim_grid2_velocity_diffuse_test2(){
fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0); fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0);
//diffuse density //diffuse density
//solve vector diffusion //solve vector diffusion
for(int l = 0; l < FLUID_GRID2_VECTOR_DIFFUSE_TIMES; l++){ for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
//solve vector diffusion //solve vector diffusion
fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,FLUID_GRID2_SIM_STEP); fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,FLUID_GRID2_SIM_STEP);
//update array for vectors //update array for vectors