diff --git a/src/main/c/src/fluid/sim/grid2/velocity.c b/src/main/c/src/fluid/sim/grid2/velocity.c index f6274540..697ab906 100644 --- a/src/main/c/src/fluid/sim/grid2/velocity.c +++ b/src/main/c/src/fluid/sim/grid2/velocity.c @@ -181,7 +181,7 @@ LIBRARY_API void fluid_grid2_solveProjection( float residual = 1; int iteration = 0; while(iteration < FLUID_GRID2_LINEARSOLVERTIMES && (residual > FLUID_GRID2_PROJECTION_CONVERGENCE_TOLERANCE || residual < -FLUID_GRID2_PROJECTION_CONVERGENCE_TOLERANCE)){ - residual = solver_multigrid_iterate_serial(p,div,a,c); + residual = solver_multigrid_iterate_parallel(p,div,a,c); fluid_grid2_set_bounds(FLUID_GRID2_BOUND_NO_DIR,p); iteration++; } diff --git a/src/main/c/src/math/ode/multigrid.c b/src/main/c/src/math/ode/multigrid.c index 23325276..039d18b8 100644 --- a/src/main/c/src/math/ode/multigrid.c +++ b/src/main/c/src/math/ode/multigrid.c @@ -19,6 +19,7 @@ static int halfDim = ((DIM - 2) / 2) + 2; */ static int quarterDim = ((DIM - 2) / 4) + 2; static int LOWEST_DIM = ((DIM - 2) / 4) + 2; +static int LOWEST_PARALLEL_DIM = ((DIM - 2) / 1) + 2; /** * The full resolution grids @@ -57,29 +58,28 @@ void initialization_check(); void restrict_serial(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM); void prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM); +//parallelized operations +void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM); + /** * Relaxes an ODE matrix by 1 iteration of multigrid method * @param phi The phi array * @param phi0 The phi array from the last frame * @param a The a const * @param c The c const + * @param GRIDDIM The dimension of the phi grid * @return The residual */ float solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float a, float c, int GRIDDIM){ - initialization_check(); - int LOWERDIM = ((GRIDDIM - 2) / 2) + 2; float * currResidual = get_current_residual(GRIDDIM); float * lowerPhi = get_current_phi(LOWERDIM); float * lowerPhi0 = get_current_phi0(LOWERDIM); float * lowerResidual = get_current_residual(LOWERDIM); - // - //gauss-seidel at highest res + //smooth solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM); - if(GRIDDIM == DIM){ - fluid_grid2_set_bounds(0,phi); - } + //compute residuals solver_multigrid_store_residual_serial(phi,phi0,currResidual,a,c,GRIDDIM); @@ -88,11 +88,6 @@ float solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float //solve next-coarsest grid if(GRIDDIM <= LOWEST_DIM){ - //smooth - solver_gauss_seidel_iterate_parallel(quarterGridPhi,quarterGridPhi0,a,c,quarterDim); - //compute residual - solver_multigrid_store_residual_serial(quarterGridPhi,quarterGridPhi0,quarterGridResidual,a,c,quarterDim); - } else { float solution = ( phi0[solver_gauss_seidel_get_index(1,1,1,GRIDDIM)] + @@ -115,6 +110,8 @@ float solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float } } } + } else { + solver_multigrid_iterate_serial_recursive(lowerPhi,lowerPhi0,a,c,LOWERDIM); } //interpolate from the lower grid @@ -122,10 +119,6 @@ float solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float //smooth solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM); - if(GRIDDIM == DIM){ - fluid_grid2_set_bounds(0,phi); - } - return solver_multigrid_calculate_residual_norm_serial(phi,phi0,a,c,GRIDDIM); } @@ -140,10 +133,52 @@ float solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float * @return The residual */ float solver_multigrid_iterate_serial(float * phi, float * phi0, float a, float c){ + initialization_check(); return solver_multigrid_iterate_serial_recursive(phi,phi0,a,c,DIM); } +/** + * Relaxes an ODE matrix by 1 iteration of multigrid method + * @param phi The phi array + * @param phi0 The phi array from the last frame + * @param a The a const + * @param c The c const + * @param GRIDDIM The dimension of the phi grid + * @return The residual + */ +float solver_multigrid_iterate_parallel_recursive(float * phi, float * phi0, float a, float c, int GRIDDIM){ + int LOWERDIM = ((GRIDDIM - 2) / 2) + 2; + float * currResidual = get_current_residual(GRIDDIM); + float * lowerPhi = get_current_phi(LOWERDIM); + float * lowerPhi0 = get_current_phi0(LOWERDIM); + float * lowerResidual = get_current_residual(LOWERDIM); + + //smooth + solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM); + + //compute residuals + solver_multigrid_store_residual_serial(phi,phi0,currResidual,a,c,GRIDDIM); + + //restrict + restrict_parallel(currResidual,GRIDDIM,lowerPhi,lowerPhi0,LOWERDIM); + + //solve next-coarsest grid + if(GRIDDIM <= LOWEST_PARALLEL_DIM){ + solver_multigrid_iterate_serial_recursive(lowerPhi,lowerPhi0,a,c,LOWERDIM); + } else { + solver_multigrid_iterate_parallel_recursive(lowerPhi,lowerPhi0,a,c,LOWERDIM); + } + + //interpolate from the lower grid + prolongate_serial(phi,GRIDDIM,lowerPhi,LOWERDIM); + + //smooth + solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM); + + return solver_multigrid_calculate_residual_norm_serial(phi,phi0,a,c,GRIDDIM); +} + /** * Relaxes an ODE matrix by 1 iteration of multigrid method * @param phi The phi array @@ -154,167 +189,7 @@ float solver_multigrid_iterate_serial(float * phi, float * phi0, float a, float */ float solver_multigrid_iterate_parallel(float * phi, float * phi0, float a, float c){ initialization_check(); - float residual; - - // - //gauss-seidel at highest res - solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,DIM); - fluid_grid2_set_bounds(0,phi); - //compute residuals - solver_multigrid_store_residual_serial(phi,phi0,fullGridResidual,a,c,DIM); - - - //restrict - //(current operator is injection -- inject r^2 from this grid at phi0 of the smaller grid) - for(int x = 0; x < halfDim - 1; x++){ - for(int y = 0; y < halfDim - 1; y++){ - for(int z = 0; z < halfDim - 1; z++){ - halfGridPhi[solver_gauss_seidel_get_index(x,y,z,halfDim)] = 0; - halfGridPhi0[solver_gauss_seidel_get_index(x,y,z,halfDim)] = 0; - } - } - } - //populate grid - for(int x = 1; x < halfDim - 1; x++){ - for(int y = 1; y < halfDim - 1; y++){ - for(int z = 1; z < halfDim - 1; z++){ - //direct transfer operator (faster, lower accuracy) - halfGridPhi0[solver_gauss_seidel_get_index(x,y,z,halfDim)] = fullGridResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,DIM)]; - } - } - } - - - - - // - //half res - // - //smooth - solver_gauss_seidel_iterate_parallel(halfGridPhi,halfGridPhi0,a,c,halfDim); - //compute residual - solver_multigrid_store_residual_serial(halfGridPhi,halfGridPhi0,halfGridResidual,a,c,halfDim); - - - - - - - // - //quarter res - // - - //restrict - //(current operator is injection -- inject r^2 from this grid at phi0 of the smaller grid) - for(int x = 0; x < quarterDim - 1; x++){ - for(int y = 0; y < quarterDim - 1; y++){ - for(int z = 0; z < quarterDim - 1; z++){ - quarterGridPhi[solver_gauss_seidel_get_index(x,y,z,quarterDim)] = 0; - quarterGridPhi0[solver_gauss_seidel_get_index(x,y,z,quarterDim)] = 0; - } - } - } - //populate grid - for(int x = 1; x < quarterDim - 1; x++){ - for(int y = 1; y < quarterDim - 1; y++){ - for(int z = 1; z < quarterDim - 1; z++){ - //direct transfer operator (faster, lower accuracy) - quarterGridPhi0[solver_gauss_seidel_get_index(x,y,z,quarterDim)] = halfGridResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,halfDim)]; - } - } - } - - //smooth - solver_gauss_seidel_iterate_parallel(quarterGridPhi,quarterGridPhi0,a,c,quarterDim); - //compute residual - solver_multigrid_store_residual_serial(quarterGridPhi,quarterGridPhi0,quarterGridResidual,a,c,quarterDim); - - //interpolate into phi of the higher grid - for(int x = 1; x < halfDim - 1; x++){ - for(int y = 1; y < halfDim - 1; y++){ - for(int z = 1; z < halfDim - 1; z++){ - //direct transfer operator (faster, lower accuracy) - halfGridPhi[solver_gauss_seidel_get_index(x,y,z,halfDim)] = - halfGridPhi[solver_gauss_seidel_get_index(x,y,z,halfDim)] + - quarterGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+0, z/2+0 ,quarterDim)] - ; - - //interpolation operator (slower, better accuracy) - // phi[solver_gauss_seidel_get_index(x,y,z,DIM)] = - // phi[solver_gauss_seidel_get_index(x,y,z,DIM)] + - // ( - // halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+0, z/2+0 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+0, z/2+1 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+1, z/2+0 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+1, z/2+1 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+0, z/2+0 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+0, z/2+1 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+1, z/2+0 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+1, z/2+1 ,halfDim)] - // ) - // ; - } - } - } - - - - - - - // - //half res - // - - //smooth - solver_gauss_seidel_iterate_parallel(halfGridPhi,halfGridPhi0,a,c,halfDim); - //compute residual - solver_multigrid_store_residual_serial(halfGridPhi,halfGridPhi0,halfGridResidual,a,c,halfDim); - - //interpolate phi of the higher grid - for(int x = 1; x < DIM - 1; x++){ - for(int y = 1; y < DIM - 1; y++){ - for(int z = 1; z < DIM - 1; z++){ - //direct transfer operator (faster, lower accuracy) - phi[solver_gauss_seidel_get_index(x,y,z,DIM)] = - phi[solver_gauss_seidel_get_index(x,y,z,DIM)] + - halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+0, z/2+0 ,halfDim)] - ; - - //interpolation operator (slower, better accuracy) - // phi[solver_gauss_seidel_get_index(x,y,z,DIM)] = - // phi[solver_gauss_seidel_get_index(x,y,z,DIM)] + - // ( - // halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+0, z/2+0 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+0, z/2+1 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+1, z/2+0 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+1, z/2+1 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+0, z/2+0 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+0, z/2+1 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+1, z/2+0 ,halfDim)] + - // halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+1, z/2+1 ,halfDim)] - // ) - // ; - } - } - } - - - - - - - - // - // full res - // - // - //smooth - solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,DIM); - fluid_grid2_set_bounds(0,phi); - - - return solver_multigrid_calculate_residual_norm_serial(phi,phi0,a,c,DIM); + return solver_multigrid_iterate_parallel_recursive(phi,phi0,a,c,DIM); } @@ -398,6 +273,94 @@ void restrict_serial(float * currResidual, int GRIDDIM, float * lowerPhi, float } } +/** + * Serially restricts the current residual into the lower phi grid + */ +void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM){ + if(LOWERDIM < 10){ + restrict_serial(currResidual,GRIDDIM,lowerPhi,lowerPhi0,LOWERDIM); + return; + } + int x, y, z; + __m256 zeroVec = _mm256_setzero_ps(); + __m256 residuals; + __m256i offsets = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + + // + //set first plane + // + x = 0; + for(y = 0; y < LOWERDIM; y++){ + for(z = 0; z < LOWERDIM-7; z=z+8){ + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + z = LOWERDIM - 8; + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + + // + //set main volume + // + for(x = 1; x < LOWERDIM - 1; x++){ + + // + //set the first edge + // + y = 0; + for(z = 0; z < LOWERDIM-7; z=z+8){ + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + z = LOWERDIM - 8; + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + + + // + //copy the main contents + // + for(y = 1; y < LOWERDIM - 1; y++){ + lowerPhi[solver_gauss_seidel_get_index(x,y,0,LOWERDIM)] = 0; + for(z = 1; z < LOWERDIM-7; z=z+8){ + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + residuals = _mm256_i32gather_ps(&currResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,GRIDDIM)],offsets,2); + _mm256_storeu_ps(&lowerPhi0[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],residuals); + } + lowerPhi[solver_gauss_seidel_get_index(x,y,LOWERDIM - 1,LOWERDIM)] = 0; + } + + // + //set the last edge + // + for(z = 0; z < LOWERDIM-7; z=z+8){ + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + z = LOWERDIM - 8; + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + + // + //set last plane + // + x = LOWERDIM - 1; + for(y = 0; y < LOWERDIM; y++){ + for(z = 0; z < LOWERDIM-7; z=z+8){ + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + //zero out the end + z = LOWERDIM - 8; + _mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec); + } + + //populate grid + for(x = 1; x < LOWERDIM - 1; x++){ + for(y = 1; y < LOWERDIM - 1; y++){ + for(z = 1; z < LOWERDIM - 1; z++){ + //direct transfer operator (faster, lower accuracy) + lowerPhi0[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)] = currResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,GRIDDIM)]; + } + } + } +} + /** * Prolongates a lower grid into a higher grid */