move iterator inside diffuse funcs

This commit is contained in:
austin 2024-12-11 11:44:45 -05:00
parent 58a9bafa4d
commit 8aab5319b5
5 changed files with 146 additions and 117 deletions

View File

@ -7,11 +7,6 @@
*/
#define FLUID_GRID2_LINEARSOLVERTIMES 2
/**
* The number of times to relax the vector diffusion solver
*/
#define FLUID_GRID2_VECTOR_DIFFUSE_TIMES 2
/**
* Width of a single grid cell
*/

View File

@ -52,30 +52,35 @@ LIBRARY_API void fluid_grid2_solveDiffuseDensity(
__m256 aScalar = _mm256_set1_ps(a);
__m256 cScalar = _mm256_set1_ps(c);
//transform u direction
for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){
int n = 0;
//solve as much as possible vectorized
for(i = 1; i < DIM-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&x[IX(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i>DIM-1){
for(i=i-8; i < DIM-1; i++){
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
//iterate
for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){
int n = 0;
//solve as much as possible vectorized
for(i = 1; i < DIM-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&x[IX(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i>DIM-1){
for(i=i-8; i < DIM-1; i++){
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
}
}
}
}
//set bounds
fluid_grid2_set_bounds(0,x);
}
}

View File

@ -81,17 +81,7 @@ LIBRARY_API void fluid_grid2_simulate(
fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0);
//solve vector diffusion
for(int l = 0; l < FLUID_GRID2_VECTOR_DIFFUSE_TIMES; l++){
//solve vector diffusion
fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,timestep);
//update array for vectors
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,currentChunk->u[CENTER_LOC]);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_V,currentChunk->v[CENTER_LOC]);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_W,currentChunk->w[CENTER_LOC]);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,currentChunk->u0[CENTER_LOC]);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_V,currentChunk->v0[CENTER_LOC]);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_W,currentChunk->w0[CENTER_LOC]);
}
fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,timestep);
//setup projection
fluid_grid2_setupProjection(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,timestep);
@ -182,10 +172,8 @@ LIBRARY_API void fluid_grid2_simulate(
//swap vector fields
fluid_grid2_flip_arrays(currentChunk->d,currentChunk->d0);
//diffuse density
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
fluid_grid2_solveDiffuseDensity(currentChunk->d,currentChunk->d0,timestep);
fluid_grid2_set_bounds(0,currentChunk->d[CENTER_LOC]);
}
fluid_grid2_solveDiffuseDensity(currentChunk->d,currentChunk->d0,timestep);
fluid_grid2_set_bounds(0,currentChunk->d[CENTER_LOC]);
//swap all density arrays
//swap vector fields
fluid_grid2_flip_arrays(currentChunk->d,currentChunk->d0);
@ -242,6 +230,22 @@ LIBRARY_API void fluid_grid2_simulate(
}
/**
* Saves a step of the simulation to a file
*/
static inline void fluid_grid2_saveStep(float * values, const char * name){
if(SAVE_STEPS){
FILE *fp;
@ -346,8 +350,19 @@ static inline void fluid_grid2_apply_bounds_mask(float * realArr, float * bounds
_mm256_storeu_ps(&realArr[IX(x,y,z)],finalVec);
//middle part
x = 8;
//border part
maskedBounds = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
//real part
realVal = _mm256_loadu_ps(&realArr[IX(x,y,z)]);
invertedMask = _mm256_loadu_ps(&fluid_grid2_border_mask_inverted[IX(x,y,z)]);
realPart = _mm256_mul_ps(realVal,invertedMask);
finalVec = _mm256_add_ps(realPart,maskedBounds);
_mm256_storeu_ps(&realArr[IX(x,y,z)],finalVec);
//upper part
x = 2;
x = 10;
//border part
maskedBounds = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
//real part
@ -382,19 +397,29 @@ static inline void fluid_grid2_apply_neighbors(Chunk * chunk){
*/
static inline void fluid_grid2_populate_masked_arr(float * sourceArr, float * workingArr){
__m256 arrVal, maskVal, masked;
int x;
for(int z = 0; z < 18; z++){
for(int y = 0; y < 18; y++){
//lower part
arrVal = _mm256_loadu_ps(&sourceArr[IX(0,y,z)]);
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(0,y,z)]);
x = 0;
arrVal = _mm256_loadu_ps(&sourceArr[IX(x,y,z)]);
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
masked = _mm256_mul_ps(arrVal,maskVal);
_mm256_storeu_ps(&workingArr[IX(0,y,z)],masked);
_mm256_storeu_ps(&workingArr[IX(x,y,z)],masked);
//middle part
x = 8;
arrVal = _mm256_loadu_ps(&sourceArr[IX(x,y,z)]);
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
masked = _mm256_mul_ps(arrVal,maskVal);
_mm256_storeu_ps(&workingArr[IX(x,y,z)],masked);
//upper part
arrVal = _mm256_loadu_ps(&sourceArr[IX(2,y,z)]);
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(0,y,z)]);
x = 10;
arrVal = _mm256_loadu_ps(&sourceArr[IX(x,y,z)]);
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
masked = _mm256_mul_ps(arrVal,maskVal);
_mm256_storeu_ps(&workingArr[IX(0,y,z)],masked);
_mm256_storeu_ps(&workingArr[IX(x,y,z)],masked);
}
}
}

View File

@ -48,8 +48,8 @@ LIBRARY_API void fluid_grid2_solveVectorDiffuse (
float ** jrw0,
float dt
){
float a=dt*FLUID_GRID2_VISCOSITY_CONSTANT/(FLUID_GRID2_H*FLUID_GRID2_H);
float c=1+6*a;
float a = dt*FLUID_GRID2_VISCOSITY_CONSTANT/(FLUID_GRID2_H*FLUID_GRID2_H);
float c = 1+6*a;
int i, j, k, l, m;
float * u = GET_ARR_RAW(jru,CENTER_LOC);
float * v = GET_ARR_RAW(jrv,CENTER_LOC);
@ -61,82 +61,86 @@ LIBRARY_API void fluid_grid2_solveVectorDiffuse (
__m256 aScalar = _mm256_set1_ps(a);
__m256 cScalar = _mm256_set1_ps(c);
//transform u direction
for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){
int n = 0;
//solve as much as possible vectorized
for(i = 1; i < DIM-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&u[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&u[IX(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i>DIM-1){
for(i=i-8; i < DIM-1; i++){
u[IX(i,j,k)] = (u0[IX(i,j,k)] + a*(u[IX(i-1,j,k)]+u[IX(i+1,j,k)]+u[IX(i,j-1,k)]+u[IX(i,j+1,k)]+u[IX(i,j,k-1)]+u[IX(i,j,k+1)]))/c;
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
//transform u direction
for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){
//solve as much as possible vectorized
for(i = 1; i < DIM-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&u[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&u[IX(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i>DIM-1){
for(i=i-8; i < DIM-1; i++){
u[IX(i,j,k)] = (u0[IX(i,j,k)] + a*(u[IX(i-1,j,k)]+u[IX(i+1,j,k)]+u[IX(i,j-1,k)]+u[IX(i,j+1,k)]+u[IX(i,j,k-1)]+u[IX(i,j,k+1)]))/c;
}
}
}
}
}
//transform v direction
for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){
int n = 0;
//solve as much as possible vectorized
for(i = 1; i < DIM-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&v[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&v[IX(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i>DIM-1){
for(i=i-8; i < DIM-1; i++){
v[IX(i,j,k)] = (v0[IX(i,j,k)] + a*(v[IX(i-1,j,k)]+v[IX(i+1,j,k)]+v[IX(i,j-1,k)]+v[IX(i,j+1,k)]+v[IX(i,j,k-1)]+v[IX(i,j,k+1)]))/c;
//transform v direction
for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){
//solve as much as possible vectorized
for(i = 1; i < DIM-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&v[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&v[IX(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i>DIM-1){
for(i=i-8; i < DIM-1; i++){
v[IX(i,j,k)] = (v0[IX(i,j,k)] + a*(v[IX(i-1,j,k)]+v[IX(i+1,j,k)]+v[IX(i,j-1,k)]+v[IX(i,j+1,k)]+v[IX(i,j,k-1)]+v[IX(i,j,k+1)]))/c;
}
}
}
}
}
//transform w direction
for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){
int n = 0;
//solve as much as possible vectorized
for(i = 1; i < DIM-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&w[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&w[IX(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i>DIM-1){
for(i=i-8; i < DIM-1; i++){
w[IX(i,j,k)] = (w0[IX(i,j,k)] + a*(w[IX(i-1,j,k)]+w[IX(i+1,j,k)]+w[IX(i,j-1,k)]+w[IX(i,j+1,k)]+w[IX(i,j,k-1)]+w[IX(i,j,k+1)]))/c;
//transform w direction
for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){
//solve as much as possible vectorized
for(i = 1; i < DIM-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&w[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&w[IX(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i>DIM-1){
for(i=i-8; i < DIM-1; i++){
w[IX(i,j,k)] = (w0[IX(i,j,k)] + a*(w[IX(i-1,j,k)]+w[IX(i+1,j,k)]+w[IX(i,j-1,k)]+w[IX(i,j+1,k)]+w[IX(i,j,k-1)]+w[IX(i,j,k+1)]))/c;
}
}
}
}
//set bounds
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,u);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_V,v);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_W,w);
}
}

View File

@ -53,7 +53,7 @@ int fluid_sim_grid2_velocity_diffuse_test1(){
fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0);
//diffuse density
//solve vector diffusion
for(int l = 0; l < FLUID_GRID2_VECTOR_DIFFUSE_TIMES; l++){
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
//solve vector diffusion
fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,FLUID_GRID2_SIM_STEP);
//update array for vectors
@ -113,7 +113,7 @@ int fluid_sim_grid2_velocity_diffuse_test2(){
fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0);
//diffuse density
//solve vector diffusion
for(int l = 0; l < FLUID_GRID2_VECTOR_DIFFUSE_TIMES; l++){
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
//solve vector diffusion
fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,FLUID_GRID2_SIM_STEP);
//update array for vectors