fluid-sim/src/main/c/linearsolver.c
2023-07-23 12:20:14 -04:00

59 lines
2.0 KiB
C

#include <jni.h>
#include <immintrin.h>
#include <stdint.h>
#include "includes/utilities.h"
JNIEXPORT void JNICALL Java_electrosphere_FluidSim_linSolve(
JNIEnv * env,
jobject this,
jint chunk_mask_raw,
jint DIM_X,
jint br,
jobject jrx,
jobject jrx0,
jfloat ar,
jfloat cr
){
//adapt object types
float * x = (*env)->GetDirectBufferAddress(env,jrx);
float * x0 = (*env)->GetDirectBufferAddress(env,jrx0);
//solve
lin_solve(env,chunk_mask_raw,DIM_X,br,x,x0,ar,cr);
}
/**
* Solves a linear system of equations in a vectorized manner
*/
void lin_solve(JNIEnv * env, uint32_t chunk_mask, int N, int b, float* x, float* x0, float a, float c){
int i, j, k, l, m;
__m256 aScalar = _mm256_set1_ps(a);
__m256 cScalar = _mm256_set1_ps(c);
// update for each cell
for(k=1; k<N-1; k++){
for(j=1; j<N-1; j++){
int n = 0;
//solve as much as possible vectorized
for(i = 1; i < N-1; i=i+8){
__m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j-1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j+1,k)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k-1)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k+1)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[IX(i,j,k)]));
vector = _mm256_div_ps(vector,cScalar);
_mm256_storeu_ps(&x[IX(i,j,k)],vector);
}
//If there is any leftover, perform manual solving
if(i>N-1){
for(i=i-8; i < N-1; i++){
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
}
}
}
}
}