gcc optimization
All checks were successful
studiorailgun/fluid-sim/pipeline/head This commit looks good

This commit is contained in:
unknown 2024-03-10 19:05:24 -04:00
parent 6f052e48e6
commit 1230ac51cb
3 changed files with 18 additions and 6 deletions

View File

@ -41,22 +41,22 @@ rm -f ./*.dll
#compile object files #compile object files
COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2 -Ofast" COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2 -march=native -Ofast -msse -msse2 -msse3 -mmmx -m3dnow"
INPUT_FILES="./densitystep.c" INPUT_FILES="./densitystep.c"
OUTPUT_FILE="./densitystep.o" OUTPUT_FILE="./densitystep.o"
gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE
COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2 -Ofast" COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2 -march=native -Ofast -msse -msse2 -msse3 -mmmx -m3dnow"
INPUT_FILES="./velocitystep.c" INPUT_FILES="./velocitystep.c"
OUTPUT_FILE="./velocitystep.o" OUTPUT_FILE="./velocitystep.o"
gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE
COMPILE_FLAGS="-c -fPIC -m64 -Ofast" COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2 -march=native -Ofast -msse -msse2 -msse3 -mmmx -m3dnow"
INPUT_FILES="./chunkmask.c" INPUT_FILES="./chunkmask.c"
OUTPUT_FILE="./chunkmask.o" OUTPUT_FILE="./chunkmask.o"
gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE
COMPILE_FLAGS="-c -fPIC -m64 -Ofast" COMPILE_FLAGS="-c -fPIC -m64 -mavx -mavx2 -march=native -Ofast -msse -msse2 -msse3 -mmmx -m3dnow"
INPUT_FILES="./fluidsim.c" INPUT_FILES="./fluidsim.c"
OUTPUT_FILE="./fluidsim.o" OUTPUT_FILE="./fluidsim.o"
gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE gcc $COMPILE_FLAGS -I"$BASE_INCLUDE_DIR" -I"$OS_INCLUDE_DIR" $INPUT_FILES -o $OUTPUT_FILE

View File

@ -738,9 +738,20 @@ void copyNeighborsRaw
// PLANES // PLANES
// //
// //
// __m512 transferVector;// = _mm512_set1_ps(0.5*N);
//__m256 vector = _mm256_loadu_ps(&p[IX(i-1,j,k)]);
//vector = _mm256_add_ps(vector,_mm256_loadu_ps(&p[IX(i+1,j,k)]));
//vector = _mm256_add_ps(vector,_mm256_loadu_ps(&p[IX(i,j-1,k)]));
//_mm256_storeu_ps(&p[IX(i,j,k)],vector);
//__m256
//_mm256_loadu_ps
//_mm256_storeu_ps
if(ARR_EXISTS(chunk_mask,0,1,1)){ if(ARR_EXISTS(chunk_mask,0,1,1)){
source = GET_ARR_RAW(env,neighborArray,CK(0,1,1)); source = GET_ARR_RAW(env,neighborArray,CK(0,1,1));
for(int x=1; x < DIM-1; x++){ for(int x=1; x < DIM-1; x++){
// transferVector = _mm512_loadu_ps(&source[IX(DIM-2,x,1)]);
// _mm512_storeu_ps(&target[IX(0,x,1)],_mm512_loadu_ps(&source[IX(DIM-2,x,1)]));
for(int y = 1; y < DIM-1; y++){ for(int y = 1; y < DIM-1; y++){
target[IX(0,x,y)] = source[IX(DIM-2,x,y)]; target[IX(0,x,y)] = source[IX(DIM-2,x,y)];
} }
@ -750,6 +761,7 @@ void copyNeighborsRaw
if(ARR_EXISTS(chunk_mask,2,1,1)){ if(ARR_EXISTS(chunk_mask,2,1,1)){
source = GET_ARR_RAW(env,neighborArray,CK(2,1,1)); source = GET_ARR_RAW(env,neighborArray,CK(2,1,1));
for(int x=1; x < DIM-1; x++){ for(int x=1; x < DIM-1; x++){
// _mm512_storeu_ps(&target[IX(DIM-1,x,1)],_mm512_loadu_ps(&source[IX(1,x,1)]));
for(int y = 1; y < DIM-1; y++){ for(int y = 1; y < DIM-1; y++){
target[IX(DIM-1,x,y)] = source[IX(1,x,y)]; target[IX(DIM-1,x,y)] = source[IX(1,x,y)];
} }

View File

@ -189,8 +189,8 @@ public class FluidSim {
//clock //clock
time = time + (GLFW.glfwGetTime() - lastTime); time = time + (GLFW.glfwGetTime() - lastTime);
i++; i++;
if(i == 100){ if(i == 10){
System.out.println(time / 100.0 * 1000.0); System.out.println(time / 10.0 * 1000.0);
} }