move iterator inside diffuse funcs
This commit is contained in:
parent
58a9bafa4d
commit
8aab5319b5
@ -7,11 +7,6 @@
|
|||||||
*/
|
*/
|
||||||
#define FLUID_GRID2_LINEARSOLVERTIMES 2
|
#define FLUID_GRID2_LINEARSOLVERTIMES 2
|
||||||
|
|
||||||
/**
|
|
||||||
* The number of times to relax the vector diffusion solver
|
|
||||||
*/
|
|
||||||
#define FLUID_GRID2_VECTOR_DIFFUSE_TIMES 2
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Width of a single grid cell
|
* Width of a single grid cell
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -52,30 +52,35 @@ LIBRARY_API void fluid_grid2_solveDiffuseDensity(
|
|||||||
__m256 aScalar = _mm256_set1_ps(a);
|
__m256 aScalar = _mm256_set1_ps(a);
|
||||||
__m256 cScalar = _mm256_set1_ps(c);
|
__m256 cScalar = _mm256_set1_ps(c);
|
||||||
|
|
||||||
//transform u direction
|
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
|
||||||
for(k=1; k<DIM-1; k++){
|
//iterate
|
||||||
for(j=1; j<DIM-1; j++){
|
for(k=1; k<DIM-1; k++){
|
||||||
int n = 0;
|
for(j=1; j<DIM-1; j++){
|
||||||
//solve as much as possible vectorized
|
int n = 0;
|
||||||
for(i = 1; i < DIM-1; i=i+8){
|
//solve as much as possible vectorized
|
||||||
__m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
|
for(i = 1; i < DIM-1; i=i+8){
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
|
__m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j-1,k)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j+1,k)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j-1,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k-1)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j+1,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k+1)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k-1)]));
|
||||||
vector = _mm256_mul_ps(vector,aScalar);
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k+1)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[IX(i,j,k)]));
|
vector = _mm256_mul_ps(vector,aScalar);
|
||||||
vector = _mm256_div_ps(vector,cScalar);
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[IX(i,j,k)]));
|
||||||
_mm256_storeu_ps(&x[IX(i,j,k)],vector);
|
vector = _mm256_div_ps(vector,cScalar);
|
||||||
}
|
_mm256_storeu_ps(&x[IX(i,j,k)],vector);
|
||||||
//If there is any leftover, perform manual solving
|
}
|
||||||
if(i>DIM-1){
|
//If there is any leftover, perform manual solving
|
||||||
for(i=i-8; i < DIM-1; i++){
|
if(i>DIM-1){
|
||||||
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
|
for(i=i-8; i < DIM-1; i++){
|
||||||
|
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//set bounds
|
||||||
|
fluid_grid2_set_bounds(0,x);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -81,17 +81,7 @@ LIBRARY_API void fluid_grid2_simulate(
|
|||||||
fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0);
|
fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0);
|
||||||
|
|
||||||
//solve vector diffusion
|
//solve vector diffusion
|
||||||
for(int l = 0; l < FLUID_GRID2_VECTOR_DIFFUSE_TIMES; l++){
|
fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,timestep);
|
||||||
//solve vector diffusion
|
|
||||||
fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,timestep);
|
|
||||||
//update array for vectors
|
|
||||||
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,currentChunk->u[CENTER_LOC]);
|
|
||||||
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_V,currentChunk->v[CENTER_LOC]);
|
|
||||||
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_W,currentChunk->w[CENTER_LOC]);
|
|
||||||
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,currentChunk->u0[CENTER_LOC]);
|
|
||||||
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_V,currentChunk->v0[CENTER_LOC]);
|
|
||||||
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_W,currentChunk->w0[CENTER_LOC]);
|
|
||||||
}
|
|
||||||
|
|
||||||
//setup projection
|
//setup projection
|
||||||
fluid_grid2_setupProjection(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,timestep);
|
fluid_grid2_setupProjection(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,timestep);
|
||||||
@ -182,10 +172,8 @@ LIBRARY_API void fluid_grid2_simulate(
|
|||||||
//swap vector fields
|
//swap vector fields
|
||||||
fluid_grid2_flip_arrays(currentChunk->d,currentChunk->d0);
|
fluid_grid2_flip_arrays(currentChunk->d,currentChunk->d0);
|
||||||
//diffuse density
|
//diffuse density
|
||||||
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
|
fluid_grid2_solveDiffuseDensity(currentChunk->d,currentChunk->d0,timestep);
|
||||||
fluid_grid2_solveDiffuseDensity(currentChunk->d,currentChunk->d0,timestep);
|
fluid_grid2_set_bounds(0,currentChunk->d[CENTER_LOC]);
|
||||||
fluid_grid2_set_bounds(0,currentChunk->d[CENTER_LOC]);
|
|
||||||
}
|
|
||||||
//swap all density arrays
|
//swap all density arrays
|
||||||
//swap vector fields
|
//swap vector fields
|
||||||
fluid_grid2_flip_arrays(currentChunk->d,currentChunk->d0);
|
fluid_grid2_flip_arrays(currentChunk->d,currentChunk->d0);
|
||||||
@ -242,6 +230,22 @@ LIBRARY_API void fluid_grid2_simulate(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Saves a step of the simulation to a file
|
||||||
|
*/
|
||||||
static inline void fluid_grid2_saveStep(float * values, const char * name){
|
static inline void fluid_grid2_saveStep(float * values, const char * name){
|
||||||
if(SAVE_STEPS){
|
if(SAVE_STEPS){
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
@ -346,8 +350,19 @@ static inline void fluid_grid2_apply_bounds_mask(float * realArr, float * bounds
|
|||||||
_mm256_storeu_ps(&realArr[IX(x,y,z)],finalVec);
|
_mm256_storeu_ps(&realArr[IX(x,y,z)],finalVec);
|
||||||
|
|
||||||
|
|
||||||
|
//middle part
|
||||||
|
x = 8;
|
||||||
|
//border part
|
||||||
|
maskedBounds = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
|
||||||
|
//real part
|
||||||
|
realVal = _mm256_loadu_ps(&realArr[IX(x,y,z)]);
|
||||||
|
invertedMask = _mm256_loadu_ps(&fluid_grid2_border_mask_inverted[IX(x,y,z)]);
|
||||||
|
realPart = _mm256_mul_ps(realVal,invertedMask);
|
||||||
|
finalVec = _mm256_add_ps(realPart,maskedBounds);
|
||||||
|
_mm256_storeu_ps(&realArr[IX(x,y,z)],finalVec);
|
||||||
|
|
||||||
//upper part
|
//upper part
|
||||||
x = 2;
|
x = 10;
|
||||||
//border part
|
//border part
|
||||||
maskedBounds = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
|
maskedBounds = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
|
||||||
//real part
|
//real part
|
||||||
@ -382,19 +397,29 @@ static inline void fluid_grid2_apply_neighbors(Chunk * chunk){
|
|||||||
*/
|
*/
|
||||||
static inline void fluid_grid2_populate_masked_arr(float * sourceArr, float * workingArr){
|
static inline void fluid_grid2_populate_masked_arr(float * sourceArr, float * workingArr){
|
||||||
__m256 arrVal, maskVal, masked;
|
__m256 arrVal, maskVal, masked;
|
||||||
|
int x;
|
||||||
for(int z = 0; z < 18; z++){
|
for(int z = 0; z < 18; z++){
|
||||||
for(int y = 0; y < 18; y++){
|
for(int y = 0; y < 18; y++){
|
||||||
//lower part
|
//lower part
|
||||||
arrVal = _mm256_loadu_ps(&sourceArr[IX(0,y,z)]);
|
x = 0;
|
||||||
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(0,y,z)]);
|
arrVal = _mm256_loadu_ps(&sourceArr[IX(x,y,z)]);
|
||||||
|
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
|
||||||
masked = _mm256_mul_ps(arrVal,maskVal);
|
masked = _mm256_mul_ps(arrVal,maskVal);
|
||||||
_mm256_storeu_ps(&workingArr[IX(0,y,z)],masked);
|
_mm256_storeu_ps(&workingArr[IX(x,y,z)],masked);
|
||||||
|
|
||||||
|
//middle part
|
||||||
|
x = 8;
|
||||||
|
arrVal = _mm256_loadu_ps(&sourceArr[IX(x,y,z)]);
|
||||||
|
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
|
||||||
|
masked = _mm256_mul_ps(arrVal,maskVal);
|
||||||
|
_mm256_storeu_ps(&workingArr[IX(x,y,z)],masked);
|
||||||
|
|
||||||
//upper part
|
//upper part
|
||||||
arrVal = _mm256_loadu_ps(&sourceArr[IX(2,y,z)]);
|
x = 10;
|
||||||
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(0,y,z)]);
|
arrVal = _mm256_loadu_ps(&sourceArr[IX(x,y,z)]);
|
||||||
|
maskVal = _mm256_loadu_ps(&fluid_grid2_border_mask[IX(x,y,z)]);
|
||||||
masked = _mm256_mul_ps(arrVal,maskVal);
|
masked = _mm256_mul_ps(arrVal,maskVal);
|
||||||
_mm256_storeu_ps(&workingArr[IX(0,y,z)],masked);
|
_mm256_storeu_ps(&workingArr[IX(x,y,z)],masked);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -48,8 +48,8 @@ LIBRARY_API void fluid_grid2_solveVectorDiffuse (
|
|||||||
float ** jrw0,
|
float ** jrw0,
|
||||||
float dt
|
float dt
|
||||||
){
|
){
|
||||||
float a=dt*FLUID_GRID2_VISCOSITY_CONSTANT/(FLUID_GRID2_H*FLUID_GRID2_H);
|
float a = dt*FLUID_GRID2_VISCOSITY_CONSTANT/(FLUID_GRID2_H*FLUID_GRID2_H);
|
||||||
float c=1+6*a;
|
float c = 1+6*a;
|
||||||
int i, j, k, l, m;
|
int i, j, k, l, m;
|
||||||
float * u = GET_ARR_RAW(jru,CENTER_LOC);
|
float * u = GET_ARR_RAW(jru,CENTER_LOC);
|
||||||
float * v = GET_ARR_RAW(jrv,CENTER_LOC);
|
float * v = GET_ARR_RAW(jrv,CENTER_LOC);
|
||||||
@ -61,82 +61,86 @@ LIBRARY_API void fluid_grid2_solveVectorDiffuse (
|
|||||||
__m256 aScalar = _mm256_set1_ps(a);
|
__m256 aScalar = _mm256_set1_ps(a);
|
||||||
__m256 cScalar = _mm256_set1_ps(c);
|
__m256 cScalar = _mm256_set1_ps(c);
|
||||||
|
|
||||||
//transform u direction
|
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
|
||||||
for(k=1; k<DIM-1; k++){
|
//transform u direction
|
||||||
for(j=1; j<DIM-1; j++){
|
for(k=1; k<DIM-1; k++){
|
||||||
int n = 0;
|
for(j=1; j<DIM-1; j++){
|
||||||
//solve as much as possible vectorized
|
//solve as much as possible vectorized
|
||||||
for(i = 1; i < DIM-1; i=i+8){
|
for(i = 1; i < DIM-1; i=i+8){
|
||||||
__m256 vector = _mm256_loadu_ps(&u[IX(i-1,j,k)]);
|
__m256 vector = _mm256_loadu_ps(&u[IX(i-1,j,k)]);
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i+1,j,k)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i+1,j,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j-1,k)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j-1,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j+1,k)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j+1,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k-1)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k-1)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k+1)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k+1)]));
|
||||||
vector = _mm256_mul_ps(vector,aScalar);
|
vector = _mm256_mul_ps(vector,aScalar);
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u0[IX(i,j,k)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u0[IX(i,j,k)]));
|
||||||
vector = _mm256_div_ps(vector,cScalar);
|
vector = _mm256_div_ps(vector,cScalar);
|
||||||
_mm256_storeu_ps(&u[IX(i,j,k)],vector);
|
_mm256_storeu_ps(&u[IX(i,j,k)],vector);
|
||||||
}
|
}
|
||||||
//If there is any leftover, perform manual solving
|
//If there is any leftover, perform manual solving
|
||||||
if(i>DIM-1){
|
if(i>DIM-1){
|
||||||
for(i=i-8; i < DIM-1; i++){
|
for(i=i-8; i < DIM-1; i++){
|
||||||
u[IX(i,j,k)] = (u0[IX(i,j,k)] + a*(u[IX(i-1,j,k)]+u[IX(i+1,j,k)]+u[IX(i,j-1,k)]+u[IX(i,j+1,k)]+u[IX(i,j,k-1)]+u[IX(i,j,k+1)]))/c;
|
u[IX(i,j,k)] = (u0[IX(i,j,k)] + a*(u[IX(i-1,j,k)]+u[IX(i+1,j,k)]+u[IX(i,j-1,k)]+u[IX(i,j+1,k)]+u[IX(i,j,k-1)]+u[IX(i,j,k+1)]))/c;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
//transform v direction
|
//transform v direction
|
||||||
for(k=1; k<DIM-1; k++){
|
for(k=1; k<DIM-1; k++){
|
||||||
for(j=1; j<DIM-1; j++){
|
for(j=1; j<DIM-1; j++){
|
||||||
int n = 0;
|
//solve as much as possible vectorized
|
||||||
//solve as much as possible vectorized
|
for(i = 1; i < DIM-1; i=i+8){
|
||||||
for(i = 1; i < DIM-1; i=i+8){
|
__m256 vector = _mm256_loadu_ps(&v[IX(i-1,j,k)]);
|
||||||
__m256 vector = _mm256_loadu_ps(&v[IX(i-1,j,k)]);
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i+1,j,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i+1,j,k)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j+1,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j+1,k)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k-1)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k-1)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k+1)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k+1)]));
|
vector = _mm256_mul_ps(vector,aScalar);
|
||||||
vector = _mm256_mul_ps(vector,aScalar);
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v0[IX(i,j,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v0[IX(i,j,k)]));
|
vector = _mm256_div_ps(vector,cScalar);
|
||||||
vector = _mm256_div_ps(vector,cScalar);
|
_mm256_storeu_ps(&v[IX(i,j,k)],vector);
|
||||||
_mm256_storeu_ps(&v[IX(i,j,k)],vector);
|
}
|
||||||
}
|
//If there is any leftover, perform manual solving
|
||||||
//If there is any leftover, perform manual solving
|
if(i>DIM-1){
|
||||||
if(i>DIM-1){
|
for(i=i-8; i < DIM-1; i++){
|
||||||
for(i=i-8; i < DIM-1; i++){
|
v[IX(i,j,k)] = (v0[IX(i,j,k)] + a*(v[IX(i-1,j,k)]+v[IX(i+1,j,k)]+v[IX(i,j-1,k)]+v[IX(i,j+1,k)]+v[IX(i,j,k-1)]+v[IX(i,j,k+1)]))/c;
|
||||||
v[IX(i,j,k)] = (v0[IX(i,j,k)] + a*(v[IX(i-1,j,k)]+v[IX(i+1,j,k)]+v[IX(i,j-1,k)]+v[IX(i,j+1,k)]+v[IX(i,j,k-1)]+v[IX(i,j,k+1)]))/c;
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
//transform w direction
|
//transform w direction
|
||||||
for(k=1; k<DIM-1; k++){
|
for(k=1; k<DIM-1; k++){
|
||||||
for(j=1; j<DIM-1; j++){
|
for(j=1; j<DIM-1; j++){
|
||||||
int n = 0;
|
//solve as much as possible vectorized
|
||||||
//solve as much as possible vectorized
|
for(i = 1; i < DIM-1; i=i+8){
|
||||||
for(i = 1; i < DIM-1; i=i+8){
|
__m256 vector = _mm256_loadu_ps(&w[IX(i-1,j,k)]);
|
||||||
__m256 vector = _mm256_loadu_ps(&w[IX(i-1,j,k)]);
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i+1,j,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i+1,j,k)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j-1,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j-1,k)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j+1,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j+1,k)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k+1)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k+1)]));
|
vector = _mm256_mul_ps(vector,aScalar);
|
||||||
vector = _mm256_mul_ps(vector,aScalar);
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w0[IX(i,j,k)]));
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w0[IX(i,j,k)]));
|
vector = _mm256_div_ps(vector,cScalar);
|
||||||
vector = _mm256_div_ps(vector,cScalar);
|
_mm256_storeu_ps(&w[IX(i,j,k)],vector);
|
||||||
_mm256_storeu_ps(&w[IX(i,j,k)],vector);
|
}
|
||||||
}
|
//If there is any leftover, perform manual solving
|
||||||
//If there is any leftover, perform manual solving
|
if(i>DIM-1){
|
||||||
if(i>DIM-1){
|
for(i=i-8; i < DIM-1; i++){
|
||||||
for(i=i-8; i < DIM-1; i++){
|
w[IX(i,j,k)] = (w0[IX(i,j,k)] + a*(w[IX(i-1,j,k)]+w[IX(i+1,j,k)]+w[IX(i,j-1,k)]+w[IX(i,j+1,k)]+w[IX(i,j,k-1)]+w[IX(i,j,k+1)]))/c;
|
||||||
w[IX(i,j,k)] = (w0[IX(i,j,k)] + a*(w[IX(i-1,j,k)]+w[IX(i+1,j,k)]+w[IX(i,j-1,k)]+w[IX(i,j+1,k)]+w[IX(i,j,k-1)]+w[IX(i,j,k+1)]))/c;
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//set bounds
|
||||||
|
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,u);
|
||||||
|
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_V,v);
|
||||||
|
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_W,w);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -53,7 +53,7 @@ int fluid_sim_grid2_velocity_diffuse_test1(){
|
|||||||
fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0);
|
fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0);
|
||||||
//diffuse density
|
//diffuse density
|
||||||
//solve vector diffusion
|
//solve vector diffusion
|
||||||
for(int l = 0; l < FLUID_GRID2_VECTOR_DIFFUSE_TIMES; l++){
|
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
|
||||||
//solve vector diffusion
|
//solve vector diffusion
|
||||||
fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,FLUID_GRID2_SIM_STEP);
|
fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,FLUID_GRID2_SIM_STEP);
|
||||||
//update array for vectors
|
//update array for vectors
|
||||||
@ -113,7 +113,7 @@ int fluid_sim_grid2_velocity_diffuse_test2(){
|
|||||||
fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0);
|
fluid_grid2_flip_arrays(currentChunk->w,currentChunk->w0);
|
||||||
//diffuse density
|
//diffuse density
|
||||||
//solve vector diffusion
|
//solve vector diffusion
|
||||||
for(int l = 0; l < FLUID_GRID2_VECTOR_DIFFUSE_TIMES; l++){
|
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
|
||||||
//solve vector diffusion
|
//solve vector diffusion
|
||||||
fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,FLUID_GRID2_SIM_STEP);
|
fluid_grid2_solveVectorDiffuse(currentChunk->u,currentChunk->v,currentChunk->w,currentChunk->u0,currentChunk->v0,currentChunk->w0,FLUID_GRID2_SIM_STEP);
|
||||||
//update array for vectors
|
//update array for vectors
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user