swap diffuse to use generic solver func
Some checks failed
studiorailgun/Renderer/pipeline/head There was a failure building this commit
Some checks failed
studiorailgun/Renderer/pipeline/head There was a failure building this commit
This commit is contained in:
parent
8aab5319b5
commit
5f565c457d
@ -9,6 +9,7 @@
|
||||
#include "fluid/sim/grid2/solver_consts.h"
|
||||
#include "fluid/sim/grid2/utilities.h"
|
||||
#include "math/ode/multigrid.h"
|
||||
#include "math/ode/gauss_seidel.h"
|
||||
|
||||
|
||||
/**
|
||||
@ -49,35 +50,9 @@ LIBRARY_API void fluid_grid2_solveDiffuseDensity(
|
||||
float * x = GET_ARR_RAW(d,CENTER_LOC);
|
||||
float * x0 = GET_ARR_RAW(d0,CENTER_LOC);
|
||||
|
||||
__m256 aScalar = _mm256_set1_ps(a);
|
||||
__m256 cScalar = _mm256_set1_ps(c);
|
||||
|
||||
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
|
||||
//iterate
|
||||
for(k=1; k<DIM-1; k++){
|
||||
for(j=1; j<DIM-1; j++){
|
||||
int n = 0;
|
||||
//solve as much as possible vectorized
|
||||
for(i = 1; i < DIM-1; i=i+8){
|
||||
__m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j-1,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j+1,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k-1)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k+1)]));
|
||||
vector = _mm256_mul_ps(vector,aScalar);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[IX(i,j,k)]));
|
||||
vector = _mm256_div_ps(vector,cScalar);
|
||||
_mm256_storeu_ps(&x[IX(i,j,k)],vector);
|
||||
}
|
||||
//If there is any leftover, perform manual solving
|
||||
if(i>DIM-1){
|
||||
for(i=i-8; i < DIM-1; i++){
|
||||
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
solver_gauss_seidel_iterate_parallel(x,x0,a,c,DIM);
|
||||
|
||||
//set bounds
|
||||
fluid_grid2_set_bounds(0,x);
|
||||
|
||||
@ -58,84 +58,15 @@ LIBRARY_API void fluid_grid2_solveVectorDiffuse (
|
||||
float * v0 = GET_ARR_RAW(jrv0,CENTER_LOC);
|
||||
float * w0 = GET_ARR_RAW(jrw0,CENTER_LOC);
|
||||
|
||||
__m256 aScalar = _mm256_set1_ps(a);
|
||||
__m256 cScalar = _mm256_set1_ps(c);
|
||||
|
||||
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
|
||||
//transform u direction
|
||||
for(k=1; k<DIM-1; k++){
|
||||
for(j=1; j<DIM-1; j++){
|
||||
//solve as much as possible vectorized
|
||||
for(i = 1; i < DIM-1; i=i+8){
|
||||
__m256 vector = _mm256_loadu_ps(&u[IX(i-1,j,k)]);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i+1,j,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j-1,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j+1,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k-1)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k+1)]));
|
||||
vector = _mm256_mul_ps(vector,aScalar);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u0[IX(i,j,k)]));
|
||||
vector = _mm256_div_ps(vector,cScalar);
|
||||
_mm256_storeu_ps(&u[IX(i,j,k)],vector);
|
||||
}
|
||||
//If there is any leftover, perform manual solving
|
||||
if(i>DIM-1){
|
||||
for(i=i-8; i < DIM-1; i++){
|
||||
u[IX(i,j,k)] = (u0[IX(i,j,k)] + a*(u[IX(i-1,j,k)]+u[IX(i+1,j,k)]+u[IX(i,j-1,k)]+u[IX(i,j+1,k)]+u[IX(i,j,k-1)]+u[IX(i,j,k+1)]))/c;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
solver_gauss_seidel_iterate_parallel(u,u0,a,c,DIM);
|
||||
|
||||
//transform v direction
|
||||
for(k=1; k<DIM-1; k++){
|
||||
for(j=1; j<DIM-1; j++){
|
||||
//solve as much as possible vectorized
|
||||
for(i = 1; i < DIM-1; i=i+8){
|
||||
__m256 vector = _mm256_loadu_ps(&v[IX(i-1,j,k)]);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i+1,j,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j+1,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k-1)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k+1)]));
|
||||
vector = _mm256_mul_ps(vector,aScalar);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v0[IX(i,j,k)]));
|
||||
vector = _mm256_div_ps(vector,cScalar);
|
||||
_mm256_storeu_ps(&v[IX(i,j,k)],vector);
|
||||
}
|
||||
//If there is any leftover, perform manual solving
|
||||
if(i>DIM-1){
|
||||
for(i=i-8; i < DIM-1; i++){
|
||||
v[IX(i,j,k)] = (v0[IX(i,j,k)] + a*(v[IX(i-1,j,k)]+v[IX(i+1,j,k)]+v[IX(i,j-1,k)]+v[IX(i,j+1,k)]+v[IX(i,j,k-1)]+v[IX(i,j,k+1)]))/c;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
solver_gauss_seidel_iterate_parallel(v,v0,a,c,DIM);
|
||||
|
||||
//transform w direction
|
||||
for(k=1; k<DIM-1; k++){
|
||||
for(j=1; j<DIM-1; j++){
|
||||
//solve as much as possible vectorized
|
||||
for(i = 1; i < DIM-1; i=i+8){
|
||||
__m256 vector = _mm256_loadu_ps(&w[IX(i-1,j,k)]);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i+1,j,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j-1,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j+1,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k+1)]));
|
||||
vector = _mm256_mul_ps(vector,aScalar);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w0[IX(i,j,k)]));
|
||||
vector = _mm256_div_ps(vector,cScalar);
|
||||
_mm256_storeu_ps(&w[IX(i,j,k)],vector);
|
||||
}
|
||||
//If there is any leftover, perform manual solving
|
||||
if(i>DIM-1){
|
||||
for(i=i-8; i < DIM-1; i++){
|
||||
w[IX(i,j,k)] = (w0[IX(i,j,k)] + a*(w[IX(i-1,j,k)]+w[IX(i+1,j,k)]+w[IX(i,j-1,k)]+w[IX(i,j+1,k)]+w[IX(i,j,k-1)]+w[IX(i,j,k+1)]))/c;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
solver_gauss_seidel_iterate_parallel(w,w0,a,c,DIM);
|
||||
|
||||
//set bounds
|
||||
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,u);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user