diff --git a/src/main/c/compile.sh b/src/main/c/compile.sh index 7de381a..623d48a 100644 --- a/src/main/c/compile.sh +++ b/src/main/c/compile.sh @@ -41,22 +41,22 @@ rm -f ./*.dll #compile object files -COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2 -Ofast" +COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2 -march=native -Ofast -msse -msse2 -msse3 -mmmx -m3dnow" INPUT_FILES="./densitystep.c" OUTPUT_FILE="./densitystep.o" gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE -COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2 -Ofast" +COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2 -march=native -Ofast -msse -msse2 -msse3 -mmmx -m3dnow" INPUT_FILES="./velocitystep.c" OUTPUT_FILE="./velocitystep.o" gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE -COMPILE_FLAGS="-c -fPIC -m64 -Ofast" +COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2 -march=native -Ofast -msse -msse2 -msse3 -mmmx -m3dnow" INPUT_FILES="./chunkmask.c" OUTPUT_FILE="./chunkmask.o" gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE -COMPILE_FLAGS="-c -fPIC -m64 -Ofast" +COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2 -march=native -Ofast -msse -msse2 -msse3 -mmmx -m3dnow" INPUT_FILES="./fluidsim.c" OUTPUT_FILE="./fluidsim.o" gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE diff --git a/src/main/c/velocitystep.c b/src/main/c/velocitystep.c index 2368039..3f40535 100644 --- a/src/main/c/velocitystep.c +++ b/src/main/c/velocitystep.c @@ -738,9 +738,20 @@ void copyNeighborsRaw // PLANES // // + // __m512 transferVector;// = _mm512_set1_ps(0.5*N); + + //__m256 vector = _mm256_loadu_ps(&p[IX(i-1,j,k)]); + //vector = _mm256_add_ps(vector,_mm256_loadu_ps(&p[IX(i+1,j,k)])); + //vector = _mm256_add_ps(vector,_mm256_loadu_ps(&p[IX(i,j-1,k)])); + //_mm256_storeu_ps(&p[IX(i,j,k)],vector); + //__m256 + //_mm256_loadu_ps + //_mm256_storeu_ps if(ARR_EXISTS(chunk_mask,0,1,1)){ source = GET_ARR_RAW(env,neighborArray,CK(0,1,1)); for(int x=1; x < DIM-1; x++){ + // transferVector = _mm512_loadu_ps(&source[IX(DIM-2,x,1)]); + // _mm512_storeu_ps(&target[IX(0,x,1)],_mm512_loadu_ps(&source[IX(DIM-2,x,1)])); for(int y = 1; y < DIM-1; y++){ target[IX(0,x,y)] = source[IX(DIM-2,x,y)]; } @@ -750,6 +761,7 @@ void copyNeighborsRaw if(ARR_EXISTS(chunk_mask,2,1,1)){ source = GET_ARR_RAW(env,neighborArray,CK(2,1,1)); for(int x=1; x < DIM-1; x++){ + // _mm512_storeu_ps(&target[IX(DIM-1,x,1)],_mm512_loadu_ps(&source[IX(1,x,1)])); for(int y = 1; y < DIM-1; y++){ target[IX(DIM-1,x,y)] = source[IX(1,x,y)]; } diff --git a/src/main/java/electrosphere/FluidSim.java b/src/main/java/electrosphere/FluidSim.java index 50f2af2..98c5471 100644 --- a/src/main/java/electrosphere/FluidSim.java +++ b/src/main/java/electrosphere/FluidSim.java @@ -189,8 +189,8 @@ public class FluidSim { //clock time = time + (GLFW.glfwGetTime() - lastTime); i++; - if(i == 100){ - System.out.println(time / 100.0 * 1000.0); + if(i == 10){ + System.out.println(time / 10.0 * 1000.0); }