swap diffuse to use generic solver func
Some checks failed
studiorailgun/Renderer/pipeline/head There was a failure building this commit

This commit is contained in:
austin 2024-12-11 11:49:17 -05:00
parent 8aab5319b5
commit 5f565c457d
2 changed files with 5 additions and 99 deletions

View File

@ -9,6 +9,7 @@
#include "fluid/sim/grid2/solver_consts.h"
#include "fluid/sim/grid2/utilities.h"
#include "math/ode/multigrid.h"
#include "math/ode/gauss_seidel.h"
/**
@ -49,35 +50,9 @@ LIBRARY_API void fluid_grid2_solveDiffuseDensity(
float * x = GET_ARR_RAW(d,CENTER_LOC);
float * x0 = GET_ARR_RAW(d0,CENTER_LOC);
__m256 aScalar = _mm256_set1_ps(a);
__m256 cScalar = _mm256_set1_ps(c);
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
//iterate
for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){
int n = 0;
//solve as much as possible vectorized
for(i = 1; i < DIM-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&x[IX(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i>DIM-1){
for(i=i-8; i < DIM-1; i++){
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
}
}
}
}
solver_gauss_seidel_iterate_parallel(x,x0,a,c,DIM);
//set bounds
fluid_grid2_set_bounds(0,x);

View File

@ -58,84 +58,15 @@ LIBRARY_API void fluid_grid2_solveVectorDiffuse (
float * v0 = GET_ARR_RAW(jrv0,CENTER_LOC);
float * w0 = GET_ARR_RAW(jrw0,CENTER_LOC);
__m256 aScalar = _mm256_set1_ps(a);
__m256 cScalar = _mm256_set1_ps(c);
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
//transform u direction
for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){
//solve as much as possible vectorized
for(i = 1; i < DIM-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&u[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&u0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&u[IX(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i>DIM-1){
for(i=i-8; i < DIM-1; i++){
u[IX(i,j,k)] = (u0[IX(i,j,k)] + a*(u[IX(i-1,j,k)]+u[IX(i+1,j,k)]+u[IX(i,j-1,k)]+u[IX(i,j+1,k)]+u[IX(i,j,k-1)]+u[IX(i,j,k+1)]))/c;
}
}
}
}
solver_gauss_seidel_iterate_parallel(u,u0,a,c,DIM);
//transform v direction
for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){
//solve as much as possible vectorized
for(i = 1; i < DIM-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&v[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&v0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&v[IX(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i>DIM-1){
for(i=i-8; i < DIM-1; i++){
v[IX(i,j,k)] = (v0[IX(i,j,k)] + a*(v[IX(i-1,j,k)]+v[IX(i+1,j,k)]+v[IX(i,j-1,k)]+v[IX(i,j+1,k)]+v[IX(i,j,k-1)]+v[IX(i,j,k+1)]))/c;
}
}
}
}
solver_gauss_seidel_iterate_parallel(v,v0,a,c,DIM);
//transform w direction
for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){
//solve as much as possible vectorized
for(i = 1; i < DIM-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&w[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&w0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&w[IX(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i>DIM-1){
for(i=i-8; i < DIM-1; i++){
w[IX(i,j,k)] = (w0[IX(i,j,k)] + a*(w[IX(i-1,j,k)]+w[IX(i+1,j,k)]+w[IX(i,j-1,k)]+w[IX(i,j+1,k)]+w[IX(i,j,k-1)]+w[IX(i,j,k+1)]))/c;
}
}
}
}
solver_gauss_seidel_iterate_parallel(w,w0,a,c,DIM);
//set bounds
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,u);