59 lines
2.0 KiB
C
59 lines
2.0 KiB
C
#include <jni.h>
|
|
#include <immintrin.h>
|
|
#include <stdint.h>
|
|
#include "includes/utilities.h"
|
|
|
|
|
|
|
|
JNIEXPORT void JNICALL Java_electrosphere_FluidSim_linSolve(
|
|
JNIEnv * env,
|
|
jobject this,
|
|
jint chunk_mask_raw,
|
|
jint DIM_X,
|
|
jint br,
|
|
jobject jrx,
|
|
jobject jrx0,
|
|
jfloat ar,
|
|
jfloat cr
|
|
){
|
|
//adapt object types
|
|
float * x = (*env)->GetDirectBufferAddress(env,jrx);
|
|
float * x0 = (*env)->GetDirectBufferAddress(env,jrx0);
|
|
|
|
//solve
|
|
lin_solve(env,chunk_mask_raw,DIM_X,br,x,x0,ar,cr);
|
|
}
|
|
|
|
/**
|
|
* Solves a linear system of equations in a vectorized manner
|
|
*/
|
|
void lin_solve(JNIEnv * env, uint32_t chunk_mask, int N, int b, float* x, float* x0, float a, float c){
|
|
int i, j, k, l, m;
|
|
__m256 aScalar = _mm256_set1_ps(a);
|
|
__m256 cScalar = _mm256_set1_ps(c);
|
|
// update for each cell
|
|
for(k=1; k<N-1; k++){
|
|
for(j=1; j<N-1; j++){
|
|
int n = 0;
|
|
//solve as much as possible vectorized
|
|
for(i = 1; i < N-1; i=i+8){
|
|
__m256 vector = _mm256_loadu_ps(&x[IX(i-1,j,k)]);
|
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i+1,j,k)]));
|
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j-1,k)]));
|
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j+1,k)]));
|
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k-1)]));
|
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x[IX(i,j,k+1)]));
|
|
vector = _mm256_mul_ps(vector,aScalar);
|
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&x0[IX(i,j,k)]));
|
|
vector = _mm256_div_ps(vector,cScalar);
|
|
_mm256_storeu_ps(&x[IX(i,j,k)],vector);
|
|
}
|
|
//If there is any leftover, perform manual solving
|
|
if(i>N-1){
|
|
for(i=i-8; i < N-1; i++){
|
|
x[IX(i,j,k)] = (x0[IX(i,j,k)] + a*(x[IX(i-1,j,k)]+x[IX(i+1,j,k)]+x[IX(i,j-1,k)]+x[IX(i,j+1,k)]+x[IX(i,j,k-1)]+x[IX(i,j,k+1)]))/c;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} |