Parallelize linear solver
This commit is contained in:
parent
10f3408f0f
commit
25b74c7f93
1
.gitignore
vendored
1
.gitignore
vendored
@ -7,3 +7,4 @@
|
||||
/.project
|
||||
/.vscode
|
||||
/shared-folder
|
||||
/shared-folder/**
|
||||
@ -54,6 +54,7 @@ INPUT_FILES="fluidsim.o"
|
||||
gcc $COMPILE_FLAGS $INPUT_FILES -o $OUTPUT_FILE
|
||||
|
||||
#move to resources
|
||||
mkdir -p ../../../shared-folder
|
||||
mv "./libfluidsim$LIB_ENDING" "../../../shared-folder/"
|
||||
|
||||
#clean compile dir
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
#define SWAP(x0,x) {float *tmp=x0;x0=x;x=tmp;}
|
||||
#define IX(i,j,k) ((i)+(N)*(j)+(N*N)*(k))
|
||||
|
||||
#define LINEARSOLVERTIMES 10
|
||||
#define LINEARSOLVERTIMES 5
|
||||
|
||||
|
||||
|
||||
@ -247,47 +247,24 @@ void lin_solve(int N, int b, float* x, float* x0, float a, float c){
|
||||
// update for each cell
|
||||
for(k=1; k<N-1; k++){
|
||||
for(j=1; j<N-1; j++){
|
||||
for(int i = 1; i < N-1; i++){
|
||||
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
|
||||
int n = 0;
|
||||
for(i = 1; i < N-1; i=i+8){
|
||||
__m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j-1,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j+1,k)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k-1)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k+1)]));
|
||||
vector = _mm256_mul_ps(vector,aScalar);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[IX(i,j,k)]));
|
||||
vector = _mm256_div_ps(vector,cScalar);
|
||||
_mm256_storeu_ps(&x[IX(i,j,k)],vector);
|
||||
}
|
||||
if(i>N-1){
|
||||
for(i=i-8; i < N-1; i++){
|
||||
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
|
||||
}
|
||||
}
|
||||
//solve first item on row
|
||||
// i=1;
|
||||
// x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
|
||||
|
||||
// // for(int n = 2; n<N-2;n++){
|
||||
// // x[IX(i,j,n)] = (a*(x[IX(i-1,j,n)]+x[IX(i+1,j,n)]+x[IX(i,j-1,n)]+x[IX(i,j+1,n)]+x[IX(i,j,n-1)]+x[IX(i,j,n+1)]));
|
||||
// // }
|
||||
// // if(x[IX(i,j,2)] > 0.0f){
|
||||
// // printf("%f, %f\n",x[IX(i,j,2)],x0[IX(i,j,2)]);
|
||||
// // }
|
||||
// //vectorize solve as many as can be packed into vector ops
|
||||
// //because we're operating on the innermost loop, guaranteed that k is align memory wise for all offsets
|
||||
// for(i=2; i+8<N-2;i=i+8){
|
||||
// // __m256 vector = _mm256_loadu_ps(&x[IX(i,j,k)]);
|
||||
// // __m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
|
||||
// // vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
|
||||
// // vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j-1,k)]));
|
||||
// // vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j+1,k)]));
|
||||
// // vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k-1)]));
|
||||
// // vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k+1)]));
|
||||
// // vector = _mm256_mul_ps(vector,aScalar);
|
||||
// // vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[IX(i,j,k)]));
|
||||
// // vector = _mm256_div_ps(vector,cScalar);
|
||||
// // _mm256_storeu_ps(&x[IX(i,j,k)],vector);
|
||||
// // x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
|
||||
// }
|
||||
// // if(x[IX(i,j,2)] > 0.0f){
|
||||
// // printf("%f, %f\n",x[IX(i,j,2)],x0[IX(i,j,2)]);
|
||||
// // }
|
||||
// for(int n = 2; n<i; n++){
|
||||
// // x[IX(i,j,n)] = (
|
||||
// // (x[IX(i,j,n)] + x[IX(i,j,n-1)] + x[IX(i,j,n+1)])
|
||||
// // * a + x0[IX(i,j,n)])/ c;
|
||||
// x[IX(i,j,n)] = (x0[IX(i,j,n)] + a*(x[IX(i-1,j,n)]+x[IX(i+1,j,n)]+x[IX(i,j-1,n)]+x[IX(i,j+1,n)]+x[IX(i,j,n-1)]+x[IX(i,j,n+1)]))/c;
|
||||
// }
|
||||
// for(; i<N-1; i++){
|
||||
// x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
|
||||
// }
|
||||
}
|
||||
}
|
||||
set_bnd(N, b, x);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user