swap diffuse to use generic solver func
Some checks failed
studiorailgun/Renderer/pipeline/head There was a failure building this commit
Some checks failed
studiorailgun/Renderer/pipeline/head There was a failure building this commit
This commit is contained in:
parent
8aab5319b5
commit
5f565c457d
@ -9,6 +9,7 @@
|
|||||||
#include "fluid/sim/grid2/solver_consts.h"
|
#include "fluid/sim/grid2/solver_consts.h"
|
||||||
#include "fluid/sim/grid2/utilities.h"
|
#include "fluid/sim/grid2/utilities.h"
|
||||||
#include "math/ode/multigrid.h"
|
#include "math/ode/multigrid.h"
|
||||||
|
#include "math/ode/gauss_seidel.h"
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -49,35 +50,9 @@ LIBRARY_API void fluid_grid2_solveDiffuseDensity(
|
|||||||
float * x = GET_ARR_RAW(d,CENTER_LOC);
|
float * x = GET_ARR_RAW(d,CENTER_LOC);
|
||||||
float * x0 = GET_ARR_RAW(d0,CENTER_LOC);
|
float * x0 = GET_ARR_RAW(d0,CENTER_LOC);
|
||||||
|
|
||||||
__m256 aScalar = _mm256_set1_ps(a);
|
|
||||||
__m256 cScalar = _mm256_set1_ps(c);
|
|
||||||
|
|
||||||
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
|
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
|
||||||
//iterate
|
//iterate
|
||||||
for(k=1; k<DIM-1; k++){
|
solver_gauss_seidel_iterate_parallel(x,x0,a,c,DIM);
|
||||||
for(j=1; j<DIM-1; j++){
|
|
||||||
int n = 0;
|
|
||||||
//solve as much as possible vectorized
|
|
||||||
for(i = 1; i < DIM-1; i=i+8){
|
|
||||||
__m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j-1,k)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j+1,k)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k-1)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k+1)]));
|
|
||||||
vector = _mm256_mul_ps(vector,aScalar);
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[IX(i,j,k)]));
|
|
||||||
vector = _mm256_div_ps(vector,cScalar);
|
|
||||||
_mm256_storeu_ps(&x[IX(i,j,k)],vector);
|
|
||||||
}
|
|
||||||
//If there is any leftover, perform manual solving
|
|
||||||
if(i>DIM-1){
|
|
||||||
for(i=i-8; i < DIM-1; i++){
|
|
||||||
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//set bounds
|
//set bounds
|
||||||
fluid_grid2_set_bounds(0,x);
|
fluid_grid2_set_bounds(0,x);
|
||||||
|
|||||||
@ -58,84 +58,15 @@ LIBRARY_API void fluid_grid2_solveVectorDiffuse (
|
|||||||
float * v0 = GET_ARR_RAW(jrv0,CENTER_LOC);
|
float * v0 = GET_ARR_RAW(jrv0,CENTER_LOC);
|
||||||
float * w0 = GET_ARR_RAW(jrw0,CENTER_LOC);
|
float * w0 = GET_ARR_RAW(jrw0,CENTER_LOC);
|
||||||
|
|
||||||
__m256 aScalar = _mm256_set1_ps(a);
|
|
||||||
__m256 cScalar = _mm256_set1_ps(c);
|
|
||||||
|
|
||||||
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
|
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
|
||||||
//transform u direction
|
//transform u direction
|
||||||
for(k=1; k<DIM-1; k++){
|
solver_gauss_seidel_iterate_parallel(u,u0,a,c,DIM);
|
||||||
for(j=1; j<DIM-1; j++){
|
|
||||||
//solve as much as possible vectorized
|
|
||||||
for(i = 1; i < DIM-1; i=i+8){
|
|
||||||
__m256 vector = _mm256_loadu_ps(&u[IX(i-1,j,k)]);
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i+1,j,k)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j-1,k)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j+1,k)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k-1)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k+1)]));
|
|
||||||
vector = _mm256_mul_ps(vector,aScalar);
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u0[IX(i,j,k)]));
|
|
||||||
vector = _mm256_div_ps(vector,cScalar);
|
|
||||||
_mm256_storeu_ps(&u[IX(i,j,k)],vector);
|
|
||||||
}
|
|
||||||
//If there is any leftover, perform manual solving
|
|
||||||
if(i>DIM-1){
|
|
||||||
for(i=i-8; i < DIM-1; i++){
|
|
||||||
u[IX(i,j,k)] = (u0[IX(i,j,k)] + a*(u[IX(i-1,j,k)]+u[IX(i+1,j,k)]+u[IX(i,j-1,k)]+u[IX(i,j+1,k)]+u[IX(i,j,k-1)]+u[IX(i,j,k+1)]))/c;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//transform v direction
|
//transform v direction
|
||||||
for(k=1; k<DIM-1; k++){
|
solver_gauss_seidel_iterate_parallel(v,v0,a,c,DIM);
|
||||||
for(j=1; j<DIM-1; j++){
|
|
||||||
//solve as much as possible vectorized
|
|
||||||
for(i = 1; i < DIM-1; i=i+8){
|
|
||||||
__m256 vector = _mm256_loadu_ps(&v[IX(i-1,j,k)]);
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i+1,j,k)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j+1,k)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k-1)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k+1)]));
|
|
||||||
vector = _mm256_mul_ps(vector,aScalar);
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v0[IX(i,j,k)]));
|
|
||||||
vector = _mm256_div_ps(vector,cScalar);
|
|
||||||
_mm256_storeu_ps(&v[IX(i,j,k)],vector);
|
|
||||||
}
|
|
||||||
//If there is any leftover, perform manual solving
|
|
||||||
if(i>DIM-1){
|
|
||||||
for(i=i-8; i < DIM-1; i++){
|
|
||||||
v[IX(i,j,k)] = (v0[IX(i,j,k)] + a*(v[IX(i-1,j,k)]+v[IX(i+1,j,k)]+v[IX(i,j-1,k)]+v[IX(i,j+1,k)]+v[IX(i,j,k-1)]+v[IX(i,j,k+1)]))/c;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//transform w direction
|
//transform w direction
|
||||||
for(k=1; k<DIM-1; k++){
|
solver_gauss_seidel_iterate_parallel(w,w0,a,c,DIM);
|
||||||
for(j=1; j<DIM-1; j++){
|
|
||||||
//solve as much as possible vectorized
|
|
||||||
for(i = 1; i < DIM-1; i=i+8){
|
|
||||||
__m256 vector = _mm256_loadu_ps(&w[IX(i-1,j,k)]);
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i+1,j,k)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j-1,k)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j+1,k)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k+1)]));
|
|
||||||
vector = _mm256_mul_ps(vector,aScalar);
|
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w0[IX(i,j,k)]));
|
|
||||||
vector = _mm256_div_ps(vector,cScalar);
|
|
||||||
_mm256_storeu_ps(&w[IX(i,j,k)],vector);
|
|
||||||
}
|
|
||||||
//If there is any leftover, perform manual solving
|
|
||||||
if(i>DIM-1){
|
|
||||||
for(i=i-8; i < DIM-1; i++){
|
|
||||||
w[IX(i,j,k)] = (w0[IX(i,j,k)] + a*(w[IX(i-1,j,k)]+w[IX(i+1,j,k)]+w[IX(i,j-1,k)]+w[IX(i,j+1,k)]+w[IX(i,j,k-1)]+w[IX(i,j,k+1)]))/c;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//set bounds
|
//set bounds
|
||||||
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,u);
|
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,u);
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user