Optimizations of fluid simulation
This commit is contained in:
parent
25b74c7f93
commit
5cbb1a0ee0
@ -4,6 +4,7 @@ LIB_ENDING=".so"
|
|||||||
BASE_INCLUDE_DIR=""
|
BASE_INCLUDE_DIR=""
|
||||||
OS_INCLUDE_DIR=""
|
OS_INCLUDE_DIR=""
|
||||||
|
|
||||||
|
#determine os
|
||||||
if [[ "$OSTYPE" == "linux-gnu"* ]]; then
|
if [[ "$OSTYPE" == "linux-gnu"* ]]; then
|
||||||
#linux
|
#linux
|
||||||
LIB_ENDING=".so"
|
LIB_ENDING=".so"
|
||||||
@ -40,7 +41,7 @@ rm -f ./*.dll
|
|||||||
|
|
||||||
|
|
||||||
#compile object files
|
#compile object files
|
||||||
COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2"
|
COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2 -O1"
|
||||||
INPUT_FILES="./fluidsim.c"
|
INPUT_FILES="./fluidsim.c"
|
||||||
OUTPUT_FILE="./fluidsim.o"
|
OUTPUT_FILE="./fluidsim.o"
|
||||||
gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE
|
gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE
|
||||||
|
|||||||
@ -6,12 +6,10 @@
|
|||||||
#define SWAP(x0,x) {float *tmp=x0;x0=x;x=tmp;}
|
#define SWAP(x0,x) {float *tmp=x0;x0=x;x=tmp;}
|
||||||
#define IX(i,j,k) ((i)+(N)*(j)+(N*N)*(k))
|
#define IX(i,j,k) ((i)+(N)*(j)+(N*N)*(k))
|
||||||
|
|
||||||
#define LINEARSOLVERTIMES 5
|
#define LINEARSOLVERTIMES 10
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//https://docs.oracle.com/javase/1.5.0/docs/guide/jni/spec/functions.html
|
|
||||||
|
|
||||||
void diffuse(int N, int b, float * x, float * x0, float diff, float dt);
|
void diffuse(int N, int b, float * x, float * x0, float diff, float dt);
|
||||||
void advect(int N, int b, float * d, float * d0, float * u, float * v, float * w, float dt);
|
void advect(int N, int b, float * d, float * d0, float * u, float * v, float * w, float dt);
|
||||||
void project(int N, float * u, float * v, float * w, float * p, float * div);
|
void project(int N, float * u, float * v, float * w, float * p, float * div);
|
||||||
@ -20,6 +18,9 @@ void dens_step(int N, float * x, float * x0, float * u, float * v, float * w, fl
|
|||||||
void vel_step(int N, float * u, float * v, float * w, float * u0, float * v0, float * w0, float visc, float dt);
|
void vel_step(int N, float * u, float * v, float * w, float * u0, float * v0, float * w0, float visc, float dt);
|
||||||
void lin_solve(int N, int b, float* x, float* x0, float a, float c);
|
void lin_solve(int N, int b, float* x, float* x0, float a, float c);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The core simulation function
|
||||||
|
*/
|
||||||
JNIEXPORT void JNICALL Java_electrosphere_FluidSim_simulate(
|
JNIEXPORT void JNICALL Java_electrosphere_FluidSim_simulate(
|
||||||
JNIEnv * env,
|
JNIEnv * env,
|
||||||
jobject this,
|
jobject this,
|
||||||
@ -48,15 +49,13 @@ JNIEXPORT void JNICALL Java_electrosphere_FluidSim_simulate(
|
|||||||
float * w0 = (*env)->GetDirectBufferAddress(env,jw0);
|
float * w0 = (*env)->GetDirectBufferAddress(env,jw0);
|
||||||
int N = DIM_X;
|
int N = DIM_X;
|
||||||
int i,j,k;
|
int i,j,k;
|
||||||
// for ( i=1 ; i<N-1 ; i++ ) { for ( j=1 ; j<N-1 ; j++ ) { for ( k=1 ; k<N-1 ; k++ ) {
|
|
||||||
// if(v[IX(i,j,k)] < -0.5){
|
|
||||||
// printf("%d %d %d %f \n",i,j,k,v[IX(i,j,k)]);
|
|
||||||
// }
|
|
||||||
// }}}
|
|
||||||
vel_step(DIM_X, u, v, w, u0, v0, w0, VISCOSITY_RATE, timestep);
|
vel_step(DIM_X, u, v, w, u0, v0, w0, VISCOSITY_RATE, timestep);
|
||||||
dens_step(DIM_X, x, x0, u, v, w, DIFFUSION_RATE, timestep);
|
dens_step(DIM_X, x, x0, u, v, w, DIFFUSION_RATE, timestep);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds values from a source array to a current frame array (eg more density to the main density array)
|
||||||
|
*/
|
||||||
void add_source(int N, float * x, float * s, float dt){
|
void add_source(int N, float * x, float * s, float dt){
|
||||||
int i;
|
int i;
|
||||||
int size=N*N*N;
|
int size=N*N*N;
|
||||||
@ -65,68 +64,79 @@ void add_source(int N, float * x, float * s, float dt){
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Diffuses a given array by a diffusion constant
|
||||||
|
*/
|
||||||
void diffuse(int N, int b, float * x, float * x0, float diff, float dt){
|
void diffuse(int N, int b, float * x, float * x0, float diff, float dt){
|
||||||
float a=dt*diff*N*N*N;
|
float a=dt*diff*N*N*N;
|
||||||
lin_solve(N, b, x, x0, a, 1+6*a);
|
lin_solve(N, b, x, x0, a, 1+6*a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Advects a given array based on the force vectors in the simulation
|
||||||
|
*/
|
||||||
void advect(int N, int b, float * d, float * d0, float * u, float * v, float * w, float dt){
|
void advect(int N, int b, float * d, float * d0, float * u, float * v, float * w, float dt){
|
||||||
int i, j, k, i0, j0, k0, i1, j1, k1;
|
int i, j, k, i0, j0, k0, i1, j1, k1;
|
||||||
float x, y, z, s0, t0, s1, t1, u1, u0, dtx,dty,dtz;
|
float x, y, z, s0, t0, s1, t1, u1, u0, dtx,dty,dtz;
|
||||||
|
|
||||||
dtx=dty=dtz=dt*N;
|
dtx=dty=dtz=dt*N;
|
||||||
|
|
||||||
for ( i=1 ; i<N-1 ; i++ ) { for ( j=1 ; j<N-1 ; j++ ) { for ( k=1 ; k<N-1 ; k++ ) {
|
for(k=1; k<N-1; k++){
|
||||||
x = i-dtx*u[IX(i,j,k)]; y = j-dty*v[IX(i,j,k)]; z = k-dtz*w[IX(i,j,k)];
|
for(j=1; j<N-1; j++){
|
||||||
if (x<0.5f) x=0.5f; if (x>N+0.5f) x=N+0.5f; i0=(int)x; i1=i0+1;
|
for(i=1; i<N-1; i++){
|
||||||
if (y<0.5f) y=0.5f; if (y>N+0.5f) y=N+0.5f; j0=(int)y; j1=j0+1;
|
x = i-dtx*u[IX(i,j,k)]; y = j-dty*v[IX(i,j,k)]; z = k-dtz*w[IX(i,j,k)];
|
||||||
if (z<0.5f) z=0.5f; if (z>N+0.5f) z=N+0.5f; k0=(int)z; k1=k0+1;
|
if (x<0.5f) x=0.5f; if (x>N+0.5f) x=N+0.5f; i0=(int)x; i1=i0+1;
|
||||||
|
if (y<0.5f) y=0.5f; if (y>N+0.5f) y=N+0.5f; j0=(int)y; j1=j0+1;
|
||||||
|
if (z<0.5f) z=0.5f; if (z>N+0.5f) z=N+0.5f; k0=(int)z; k1=k0+1;
|
||||||
|
|
||||||
s1 = x-i0; s0 = 1-s1; t1 = y-j0; t0 = 1-t1; u1 = z-k0; u0 = 1-u1;
|
s1 = x-i0; s0 = 1-s1; t1 = y-j0; t0 = 1-t1; u1 = z-k0; u0 = 1-u1;
|
||||||
if(i0 >= N){
|
if(i0 >= N){
|
||||||
i0 = N - 1;
|
i0 = N - 1;
|
||||||
|
}
|
||||||
|
// if(i0 < 0){
|
||||||
|
// i0 = 0;
|
||||||
|
// }
|
||||||
|
if(j0 >= N){
|
||||||
|
j0 = N - 1;
|
||||||
|
}
|
||||||
|
// if(j0 < 0){
|
||||||
|
// j0 = 0;
|
||||||
|
// }
|
||||||
|
if(k0 >= N){
|
||||||
|
k0 = N - 1;
|
||||||
|
}
|
||||||
|
// if(k0 < 0){
|
||||||
|
// k0 = 0;
|
||||||
|
// }
|
||||||
|
if(i1 >= N){
|
||||||
|
i1 = N - 1;
|
||||||
|
}
|
||||||
|
// if(i1 < 0){
|
||||||
|
// i1 = 0;
|
||||||
|
// }
|
||||||
|
if(j1 >= N){
|
||||||
|
j1 = N - 1;
|
||||||
|
}
|
||||||
|
// if(j1 < 0){
|
||||||
|
// j1 = 0;
|
||||||
|
// }
|
||||||
|
if(k1 >= N){
|
||||||
|
k1 = N - 1;
|
||||||
|
}
|
||||||
|
// if(k1 < 0){
|
||||||
|
// k1 = 0;
|
||||||
|
// }
|
||||||
|
d[IX(i,j,k)] = s0*(t0*u0*d0[IX(i0,j0,k0)]+t1*u0*d0[IX(i0,j1,k0)]+t0*u1*d0[IX(i0,j0,k1)]+t1*u1*d0[IX(i0,j1,k1)])+
|
||||||
|
s1*(t0*u0*d0[IX(i1,j0,k0)]+t1*u0*d0[IX(i1,j1,k0)]+t0*u1*d0[IX(i1,j0,k1)]+t1*u1*d0[IX(i1,j1,k1)]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// if(i0 < 0){
|
}
|
||||||
// i0 = 0;
|
|
||||||
// }
|
|
||||||
if(j0 >= N){
|
|
||||||
j0 = N - 1;
|
|
||||||
}
|
|
||||||
// if(j0 < 0){
|
|
||||||
// j0 = 0;
|
|
||||||
// }
|
|
||||||
if(k0 >= N){
|
|
||||||
k0 = N - 1;
|
|
||||||
}
|
|
||||||
// if(k0 < 0){
|
|
||||||
// k0 = 0;
|
|
||||||
// }
|
|
||||||
if(i1 >= N){
|
|
||||||
i1 = N - 1;
|
|
||||||
}
|
|
||||||
// if(i1 < 0){
|
|
||||||
// i1 = 0;
|
|
||||||
// }
|
|
||||||
if(j1 >= N){
|
|
||||||
j1 = N - 1;
|
|
||||||
}
|
|
||||||
// if(j1 < 0){
|
|
||||||
// j1 = 0;
|
|
||||||
// }
|
|
||||||
if(k1 >= N){
|
|
||||||
k1 = N - 1;
|
|
||||||
}
|
|
||||||
// if(k1 < 0){
|
|
||||||
// k1 = 0;
|
|
||||||
// }
|
|
||||||
d[IX(i,j,k)] = s0*(t0*u0*d0[IX(i0,j0,k0)]+t1*u0*d0[IX(i0,j1,k0)]+t0*u1*d0[IX(i0,j0,k1)]+t1*u1*d0[IX(i0,j1,k1)])+
|
|
||||||
s1*(t0*u0*d0[IX(i1,j0,k0)]+t1*u0*d0[IX(i1,j1,k0)]+t0*u1*d0[IX(i1,j0,k1)]+t1*u1*d0[IX(i1,j1,k1)]);
|
|
||||||
}}}
|
|
||||||
set_bnd(N, b, d);
|
set_bnd(N, b, d);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The main density step function
|
||||||
|
*/
|
||||||
void dens_step(int N, float * x, float * x0, float * u, float * v, float * w, float diff, float dt){
|
void dens_step(int N, float * x, float * x0, float * u, float * v, float * w, float diff, float dt){
|
||||||
add_source(N, x, x0, dt);
|
add_source(N, x, x0, dt);
|
||||||
SWAP(x0, x);
|
SWAP(x0, x);
|
||||||
@ -135,6 +145,9 @@ void dens_step(int N, float * x, float * x0, float * u, float * v, float * w, fl
|
|||||||
advect(N, 0, x, x0, u, v, w, dt);
|
advect(N, 0, x, x0, u, v, w, dt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The main velocity step function
|
||||||
|
*/
|
||||||
void vel_step(int N, float * u, float * v, float * w, float * u0, float * v0, float * w0, float visc, float dt){
|
void vel_step(int N, float * u, float * v, float * w, float * u0, float * v0, float * w0, float visc, float dt){
|
||||||
add_source(N, u, u0, dt);
|
add_source(N, u, u0, dt);
|
||||||
add_source(N, v, v0, dt);
|
add_source(N, v, v0, dt);
|
||||||
@ -155,15 +168,68 @@ void vel_step(int N, float * u, float * v, float * w, float * u0, float * v0, fl
|
|||||||
project(N, u, v, w, u0, v0);
|
project(N, u, v, w, u0, v0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//used for temporary vector storage when appropriate
|
||||||
float container[16];
|
float container[16];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Projects a given array based on force vectors
|
||||||
|
*/
|
||||||
void project(int N, float * u, float * v, float * w, float * p, float * div){
|
void project(int N, float * u, float * v, float * w, float * p, float * div){
|
||||||
int i, j, k;
|
int i, j, k;
|
||||||
|
|
||||||
for ( i=1 ; i<N-1 ; i++ ) { for ( j=1 ; j<N-1 ; j++ ) { for ( k=1 ; k<N-1 ; k++ ) {
|
__m256 nVector = _mm256_set1_ps(N);
|
||||||
div[IX(i,j,k)] = (float)(-1.0/3.0*((u[IX(i+1,j,k)]-u[IX(i-1,j,k)])/N+(v[IX(i,j+1,k)]-v[IX(i,j-1,k)])/N+(w[IX(i,j,k+1)]-w[IX(i,j,k-1)])/N));
|
__m256 constScalar = _mm256_set1_ps(-1.0/3.0);
|
||||||
p[IX(i,j,k)] = 0;
|
__m256 zeroVec = _mm256_set1_ps(0);
|
||||||
}}}
|
__m256 vector, vector2, vector3;
|
||||||
|
|
||||||
|
for(k=1; k<N-1; k++){
|
||||||
|
for(j=1; j<N-1; j++){
|
||||||
|
i = 1;
|
||||||
|
//
|
||||||
|
//lower
|
||||||
|
//
|
||||||
|
//first part
|
||||||
|
vector = _mm256_loadu_ps(&u[IX(i+1,j,k)]);
|
||||||
|
vector = _mm256_sub_ps(vector,_mm256_loadu_ps(&u[IX(i-1,j,k)]));
|
||||||
|
vector = _mm256_div_ps(vector,nVector);
|
||||||
|
//second part
|
||||||
|
vector2 = _mm256_loadu_ps(&v[IX(i,j+1,k)]);
|
||||||
|
vector2 = _mm256_sub_ps(vector2,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
|
||||||
|
vector2 = _mm256_div_ps(vector2,nVector);
|
||||||
|
//third part
|
||||||
|
vector3 = _mm256_loadu_ps(&w[IX(i,j,k+1)]);
|
||||||
|
vector3 = _mm256_sub_ps(vector3,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
|
||||||
|
vector3 = _mm256_div_ps(vector3,nVector);
|
||||||
|
//multiply and finalize
|
||||||
|
vector = _mm256_add_ps(vector,_mm256_add_ps(vector2,vector3));
|
||||||
|
vector = _mm256_mul_ps(vector,constScalar);
|
||||||
|
//store
|
||||||
|
_mm256_storeu_ps(&div[IX(i,j,k)],vector);
|
||||||
|
_mm256_storeu_ps(&p[IX(i,j,k)],zeroVec);
|
||||||
|
i = 9;
|
||||||
|
//
|
||||||
|
//upper
|
||||||
|
//
|
||||||
|
//first part
|
||||||
|
vector = _mm256_loadu_ps(&u[IX(i+1,j,k)]);
|
||||||
|
vector = _mm256_sub_ps(vector,_mm256_loadu_ps(&u[IX(i-1,j,k)]));
|
||||||
|
vector = _mm256_div_ps(vector,nVector);
|
||||||
|
//second part
|
||||||
|
vector2 = _mm256_loadu_ps(&v[IX(i,j+1,k)]);
|
||||||
|
vector2 = _mm256_sub_ps(vector2,_mm256_loadu_ps(&v[IX(i,j-1,k)]));
|
||||||
|
vector2 = _mm256_div_ps(vector2,nVector);
|
||||||
|
//third part
|
||||||
|
vector3 = _mm256_loadu_ps(&w[IX(i,j,k+1)]);
|
||||||
|
vector3 = _mm256_sub_ps(vector3,_mm256_loadu_ps(&w[IX(i,j,k-1)]));
|
||||||
|
vector3 = _mm256_div_ps(vector3,nVector);
|
||||||
|
//multiply and finalize
|
||||||
|
vector = _mm256_add_ps(vector,_mm256_add_ps(vector2,vector3));
|
||||||
|
vector = _mm256_mul_ps(vector,constScalar);
|
||||||
|
//store
|
||||||
|
_mm256_storeu_ps(&div[IX(i,j,k)],vector);
|
||||||
|
_mm256_storeu_ps(&p[IX(i,j,k)],zeroVec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
set_bnd(N, 0, div);
|
set_bnd(N, 0, div);
|
||||||
set_bnd(N, 0, p);
|
set_bnd(N, 0, p);
|
||||||
@ -171,9 +237,7 @@ void project(int N, float * u, float * v, float * w, float * p, float * div){
|
|||||||
lin_solve(N, 0, p, div, 1, 6);
|
lin_solve(N, 0, p, div, 1, 6);
|
||||||
|
|
||||||
|
|
||||||
__m256 constScalar = _mm256_set1_ps(0.5f*N);
|
constScalar = _mm256_set1_ps(0.5f*N);
|
||||||
__m256 vector;
|
|
||||||
__m256 vector2;
|
|
||||||
for ( k=1 ; k<N-1 ; k++ ) {
|
for ( k=1 ; k<N-1 ; k++ ) {
|
||||||
for ( j=1 ; j<N-1 ; j++ ) {
|
for ( j=1 ; j<N-1 ; j++ ) {
|
||||||
//
|
//
|
||||||
@ -235,11 +299,11 @@ void project(int N, float * u, float * v, float * w, float * p, float * div){
|
|||||||
set_bnd(N, 3, w);
|
set_bnd(N, 3, w);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Solves a linear system of equations in a vectorized manner
|
||||||
|
*/
|
||||||
void lin_solve(int N, int b, float* x, float* x0, float a, float c){
|
void lin_solve(int N, int b, float* x, float* x0, float a, float c){
|
||||||
int i, j, k, l, m;
|
int i, j, k, l, m;
|
||||||
|
|
||||||
|
|
||||||
__m256 aScalar = _mm256_set1_ps(a);
|
__m256 aScalar = _mm256_set1_ps(a);
|
||||||
__m256 cScalar = _mm256_set1_ps(c);
|
__m256 cScalar = _mm256_set1_ps(c);
|
||||||
// iterate the solver
|
// iterate the solver
|
||||||
@ -248,6 +312,7 @@ void lin_solve(int N, int b, float* x, float* x0, float a, float c){
|
|||||||
for(k=1; k<N-1; k++){
|
for(k=1; k<N-1; k++){
|
||||||
for(j=1; j<N-1; j++){
|
for(j=1; j<N-1; j++){
|
||||||
int n = 0;
|
int n = 0;
|
||||||
|
//solve as much as possible vectorized
|
||||||
for(i = 1; i < N-1; i=i+8){
|
for(i = 1; i < N-1; i=i+8){
|
||||||
__m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
|
__m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
|
||||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
|
||||||
@ -260,6 +325,7 @@ void lin_solve(int N, int b, float* x, float* x0, float a, float c){
|
|||||||
vector = _mm256_div_ps(vector,cScalar);
|
vector = _mm256_div_ps(vector,cScalar);
|
||||||
_mm256_storeu_ps(&x[IX(i,j,k)],vector);
|
_mm256_storeu_ps(&x[IX(i,j,k)],vector);
|
||||||
}
|
}
|
||||||
|
//If there is any leftover, perform manual solving
|
||||||
if(i>N-1){
|
if(i>N-1){
|
||||||
for(i=i-8; i < N-1; i++){
|
for(i=i-8; i < N-1; i++){
|
||||||
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
|
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
|
||||||
@ -271,7 +337,9 @@ void lin_solve(int N, int b, float* x, float* x0, float a, float c){
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the bounds of the simulation
|
||||||
|
*/
|
||||||
void set_bnd(int N, int b, float * target){
|
void set_bnd(int N, int b, float * target){
|
||||||
int DIM = N;
|
int DIM = N;
|
||||||
for(int x=1; x < DIM-1; x++){
|
for(int x=1; x < DIM-1; x++){
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user