diff --git a/src/main/c/includes/math/ode/multigrid.h b/src/main/c/includes/math/ode/multigrid.h index 9e808778..08c2c6cc 100644 --- a/src/main/c/includes/math/ode/multigrid.h +++ b/src/main/c/includes/math/ode/multigrid.h @@ -27,6 +27,58 @@ float solver_multigrid_iterate_serial(float * phi, float * phi0, float a, float */ float solver_multigrid_iterate_parallel(float * phi, float * phi0, float a, float c); +/** + * Relaxes an ODE matrix by 1 iteration of multigrid method + * @param phi The phi array + * @param phi0 The phi array from the last frame + * @param a The a const + * @param c The c const + * @param GRIDDIM The dimension of the phi grid + * @return The residual + */ +void solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float a, float c, int GRIDDIM); + +/** + * Verifies that all grids are allocated + */ +void solver_multigrid_initialization_check(); + +/** + * Calculates the residual of the grid + * @return Returns the residual norm of the grid + */ +float solver_multigrid_calculate_residual_norm_serial(float * phi, float * phi0, float a, float c, int GRIDDIM); + +/** + * Serially restricts the current residual into the lower phi grid + */ +void solver_multigrid_restrict_serial(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM); + +/** + * Prolongates a lower grid into a higher grid + */ +void solver_multigrid_prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM); + +/** + * Gets the phi to use for the current level of the multigrid solver + */ +float * solver_multigrid_get_current_phi(int dim); + +/** + * Gets the phi0 to use for the current level of the multigrid solver + */ +float * solver_multigrid_get_current_phi0(int dim); + +/** + * Gets the residual to use for the current level of the multigrid solver + */ +float * solver_multigrid_get_current_residual(int dim); + +/** + * Calculates the residual of the grid + */ +void solver_multigrid_store_residual_serial(float * phi, float * phi0, float * residualGrid, float a, float c, int GRIDDIM); + #endif \ No newline at end of file diff --git a/src/main/c/includes/math/ode/multigrid_parallel.h b/src/main/c/includes/math/ode/multigrid_parallel.h new file mode 100644 index 00000000..8607ffaf --- /dev/null +++ b/src/main/c/includes/math/ode/multigrid_parallel.h @@ -0,0 +1,294 @@ +#ifndef MATH_ODE_MULTIGRID_PARALLEL_H +#define MATH_ODE_MULTIGRID_PARALLEL_H + +#include "math/ode/gauss_seidel.h" +#include "math/ode/multigrid.h" + +#define MULTIGRID_PARALLEL_LOWEST_PARALLEL_DIM ((DIM - 2) / 1) + 2 + + + + + + + +/** + * Serially restricts the current residual into the lower phi grid + */ +static inline void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM){ + if(LOWERDIM < 10){ + solver_multigrid_restrict_serial(currResidual,GRIDDIM,lowerPhi,lowerPhi0,LOWERDIM); + return; + } + int x, y, z; + __m256 zeroVec = _mm256_setzero_ps(); + __m256 residuals; + __m256i offsets = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + + // + //set first plane + // + x = 0; + for(y = 0; y < LOWERDIM; y++){ + for(z = 0; z < LOWERDIM-7; z=z+8){ + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + z = LOWERDIM - 8; + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + + // + //set main volume + // + for(x = 1; x < LOWERDIM - 1; x++){ + + // + //set the first edge + // + y = 0; + for(z = 0; z < LOWERDIM-7; z=z+8){ + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + z = LOWERDIM - 8; + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + + + // + //copy the main contents + // + for(y = 1; y < LOWERDIM - 1; y++){ + lowerPhi[solver_gauss_seidel_get_index(x,y,0,LOWERDIM)] = 0; + for(z = 1; z < LOWERDIM-7; z=z+8){ + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + residuals = _mm256_i32gather_ps(&currResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,GRIDDIM)],offsets,sizeof(float)); + _mm256_storeu_ps(&lowerPhi0[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],residuals); + } + lowerPhi[solver_gauss_seidel_get_index(x,y,LOWERDIM - 1,LOWERDIM)] = 0; + } + + // + //set the last edge + // + for(z = 0; z < LOWERDIM-7; z=z+8){ + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + z = LOWERDIM - 8; + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + + // + //set last plane + // + x = LOWERDIM - 1; + for(y = 0; y < LOWERDIM; y++){ + for(z = 0; z < LOWERDIM-7; z=z+8){ + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + //zero out the end + z = LOWERDIM - 8; + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + + //populate grid + for(x = 1; x < LOWERDIM - 1; x++){ + for(y = 1; y < LOWERDIM - 1; y++){ + for(z = 1; z < LOWERDIM - 1; z++){ + //direct transfer operator (faster, lower accuracy) + lowerPhi0[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)] = currResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,GRIDDIM)]; + } + } + } +} + + + + + +/** + * Calculates the residual of the grid + */ +static inline float solver_multigrid_calculate_residual_parallel(float * phi, float * phi0, float a, float c, int GRIDDIM){ + //calculate residual + __m256 aScalar = _mm256_set1_ps(a); + __m256 cScalar = _mm256_set1_ps(c); + __m256 collector = _mm256_setzero_ps(); + __m256 vector; + float vec_sum_storage[8]; + float residual = 0; + //transform u direction + int i, j, k; + int increment = 0; + for(k=1; k GRIDDIM*GRIDDIM*GRIDDIM){ + printf("INCREMENT FAILURE %d %d %d %d \n",i,j,k,increment); + return -1; + } + } + } + } + _mm256_storeu_ps(vec_sum_storage,collector); + residual = + vec_sum_storage[0] + vec_sum_storage[1] + + vec_sum_storage[2] + vec_sum_storage[3] + + vec_sum_storage[4] + vec_sum_storage[5] + + vec_sum_storage[6] + vec_sum_storage[7] + ; + return residual; +} + +/** + * Prolongates a lower grid into a higher grid + */ +static inline void prolongate_parallel(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM){ + if(LOWERDIM < 10){ + solver_multigrid_prolongate_serial(phi,GRIDDIM,lowerPhi,LOWERDIM); + return; + } + __m256i offsets = _mm256_set_epi32(0, 0, 1, 1, 2, 2, 3, 3); + __m256 lowerPhiVec; + __m256 phiVec; + int x, y, z; + for(int x = 1; x < GRIDDIM - 1; x++){ + for(int y = 1; y < GRIDDIM - 1; y++){ + for(int z = 1; z < GRIDDIM - 1; z=z+8){ + phiVec = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(x,y,z,GRIDDIM)]); + lowerPhiVec = _mm256_i32gather_ps(&lowerPhi[solver_gauss_seidel_get_index(x/2,y/2,z/2,LOWERDIM)],offsets,sizeof(float)); + _mm256_storeu_ps( + &phi[solver_gauss_seidel_get_index(x,y,z,GRIDDIM)], + _mm256_add_ps( + phiVec, + lowerPhiVec + ) + ); + } + } + } +} + + + + + +/** + * Calculates the residual of the grid + */ +static inline void solver_multigrid_parallel_store_residual(float * phi, float * phi0, float * residualGrid, float a, float c, int GRIDDIM){ + if(GRIDDIM < 10){ + solver_multigrid_store_residual_serial(phi,phi0,residualGrid,a,c,GRIDDIM); + return; + } + __m256 laplacian; + __m256 constVec = _mm256_set1_ps(6); + //calculate residual + int i, j, k; + for(k=1; k FLUID_GRID2_PROJECTION_CONVERGENCE_TOLERANCE || residual < -FLUID_GRID2_PROJECTION_CONVERGENCE_TOLERANCE)){ - residual = solver_multigrid_iterate_parallel(p,div,a,c); + residual = solver_multigrid_parallel_iterate(p,div,a,c); fluid_grid2_set_bounds(FLUID_GRID2_BOUND_NO_DIR,p); iteration++; } diff --git a/src/main/c/src/math/ode/multigrid.c b/src/main/c/src/math/ode/multigrid.c index a6476980..e851f87e 100644 --- a/src/main/c/src/math/ode/multigrid.c +++ b/src/main/c/src/math/ode/multigrid.c @@ -51,12 +51,6 @@ static float * eighthGridResidual = NULL; float solver_multigrid_calculate_residual_norm_serial(float * phi, float * phi0, float a, float c, int GRIDDIM); float solver_multigrid_calculate_residual_parallel(float * phi, float * phi0, float a, float c, int GRIDDIM); void solver_multigrid_store_residual_serial(float * phi, float * phi0, float * residualGrid, float a, float c, int GRIDDIM); -float * get_current_phi(int dim); -float * get_current_phi0(int dim); -float * get_current_residual(int dim); -void initialization_check(); -void restrict_serial(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM); -void prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM); //parallelized operations void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM); @@ -74,10 +68,10 @@ void solver_multigrid_store_residual_parallel(float * phi, float * phi0, float * */ void solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float a, float c, int GRIDDIM){ int LOWERDIM = ((GRIDDIM - 2) / 2) + 2; - float * currResidual = get_current_residual(GRIDDIM); - float * lowerPhi = get_current_phi(LOWERDIM); - float * lowerPhi0 = get_current_phi0(LOWERDIM); - float * lowerResidual = get_current_residual(LOWERDIM); + float * currResidual = solver_multigrid_get_current_residual(GRIDDIM); + float * lowerPhi = solver_multigrid_get_current_phi(LOWERDIM); + float * lowerPhi0 = solver_multigrid_get_current_phi0(LOWERDIM); + float * lowerResidual = solver_multigrid_get_current_residual(LOWERDIM); //smooth solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM); @@ -133,7 +127,7 @@ void solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float * @return The residual */ float solver_multigrid_iterate_serial(float * phi, float * phi0, float a, float c){ - initialization_check(); + solver_multigrid_initialization_check(); solver_multigrid_iterate_serial_recursive(phi,phi0,a,c,DIM); return solver_multigrid_calculate_residual_norm_serial(phi,phi0,a,c,DIM); } @@ -150,10 +144,10 @@ float solver_multigrid_iterate_serial(float * phi, float * phi0, float a, float */ void solver_multigrid_iterate_parallel_recursive(float * phi, float * phi0, float a, float c, int GRIDDIM){ int LOWERDIM = ((GRIDDIM - 2) / 2) + 2; - float * currResidual = get_current_residual(GRIDDIM); - float * lowerPhi = get_current_phi(LOWERDIM); - float * lowerPhi0 = get_current_phi0(LOWERDIM); - float * lowerResidual = get_current_residual(LOWERDIM); + float * currResidual = solver_multigrid_get_current_residual(GRIDDIM); + float * lowerPhi = solver_multigrid_get_current_phi(LOWERDIM); + float * lowerPhi0 = solver_multigrid_get_current_phi0(LOWERDIM); + float * lowerResidual = solver_multigrid_get_current_residual(LOWERDIM); //smooth solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM); @@ -187,7 +181,7 @@ void solver_multigrid_iterate_parallel_recursive(float * phi, float * phi0, floa * @return The residual */ float solver_multigrid_iterate_parallel(float * phi, float * phi0, float a, float c){ - initialization_check(); + solver_multigrid_initialization_check(); solver_multigrid_iterate_parallel_recursive(phi,phi0,a,c,DIM); return solver_multigrid_calculate_residual_norm_serial(phi,phi0,a,c,DIM); } @@ -197,7 +191,7 @@ float solver_multigrid_iterate_parallel(float * phi, float * phi0, float a, floa /** * Gets the phi to use for the current level of the multigrid solver */ -float * get_current_phi(int dim){ +float * solver_multigrid_get_current_phi(int dim){ if(dim == ((DIM-2)/2) + 2){ return halfGridPhi; } else if(dim == ((DIM-2)/4) + 2){ @@ -214,7 +208,7 @@ float * get_current_phi(int dim){ /** * Gets the phi0 to use for the current level of the multigrid solver */ -float * get_current_phi0(int dim){ +float * solver_multigrid_get_current_phi0(int dim){ if(dim == ((DIM-2)/2) + 2){ return halfGridPhi0; } else if(dim == ((DIM-2)/4) + 2){ @@ -231,7 +225,7 @@ float * get_current_phi0(int dim){ /** * Gets the residual to use for the current level of the multigrid solver */ -float * get_current_residual(int dim){ +float * solver_multigrid_get_current_residual(int dim){ if(dim == 18){ return fullGridResidual; } else if(dim == ((DIM-2)/2) + 2){ @@ -250,7 +244,7 @@ float * get_current_residual(int dim){ /** * Serially restricts the current residual into the lower phi grid */ -void restrict_serial(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM){ +void solver_multigrid_restrict_serial(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM){ int x, y, z; //restrict //(current operator is injection -- inject r^2 from this grid at phi0 of the smaller grid) @@ -278,7 +272,7 @@ void restrict_serial(float * currResidual, int GRIDDIM, float * lowerPhi, float */ void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM){ if(LOWERDIM < 10){ - restrict_serial(currResidual,GRIDDIM,lowerPhi,lowerPhi0,LOWERDIM); + solver_multigrid_restrict_serial(currResidual,GRIDDIM,lowerPhi,lowerPhi0,LOWERDIM); return; } int x, y, z; @@ -364,7 +358,7 @@ void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, floa /** * Prolongates a lower grid into a higher grid */ -void prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM){ +void solver_multigrid_prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM){ int x, y, z; for(int x = 1; x < GRIDDIM - 1; x++){ for(int y = 1; y < GRIDDIM - 1; y++){ @@ -384,7 +378,7 @@ void prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM) */ void prolongate_parallel(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM){ if(LOWERDIM < 10){ - prolongate_serial(phi,GRIDDIM,lowerPhi,LOWERDIM); + solver_multigrid_prolongate_serial(phi,GRIDDIM,lowerPhi,LOWERDIM); return; } __m256i offsets = _mm256_set_epi32(0, 0, 1, 1, 2, 2, 3, 3); @@ -411,7 +405,7 @@ void prolongate_parallel(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDI /** * Verifies that all grids are allocated */ -void initialization_check(){ +void solver_multigrid_initialization_check(){ // full res if(fullGridResidual == NULL){ fullGridResidual = (float *)calloc(1,DIM * DIM * DIM * sizeof(float));