fix non-recursion bug + work on parallel
Some checks failed
studiorailgun/Renderer/pipeline/head There was a failure building this commit
Some checks failed
studiorailgun/Renderer/pipeline/head There was a failure building this commit
This commit is contained in:
parent
81e083f6c2
commit
66f7080890
@ -181,7 +181,7 @@ LIBRARY_API void fluid_grid2_solveProjection(
|
|||||||
float residual = 1;
|
float residual = 1;
|
||||||
int iteration = 0;
|
int iteration = 0;
|
||||||
while(iteration < FLUID_GRID2_LINEARSOLVERTIMES && (residual > FLUID_GRID2_PROJECTION_CONVERGENCE_TOLERANCE || residual < -FLUID_GRID2_PROJECTION_CONVERGENCE_TOLERANCE)){
|
while(iteration < FLUID_GRID2_LINEARSOLVERTIMES && (residual > FLUID_GRID2_PROJECTION_CONVERGENCE_TOLERANCE || residual < -FLUID_GRID2_PROJECTION_CONVERGENCE_TOLERANCE)){
|
||||||
residual = solver_multigrid_iterate_serial(p,div,a,c);
|
residual = solver_multigrid_iterate_parallel(p,div,a,c);
|
||||||
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_NO_DIR,p);
|
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_NO_DIR,p);
|
||||||
iteration++;
|
iteration++;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -19,6 +19,7 @@ static int halfDim = ((DIM - 2) / 2) + 2;
|
|||||||
*/
|
*/
|
||||||
static int quarterDim = ((DIM - 2) / 4) + 2;
|
static int quarterDim = ((DIM - 2) / 4) + 2;
|
||||||
static int LOWEST_DIM = ((DIM - 2) / 4) + 2;
|
static int LOWEST_DIM = ((DIM - 2) / 4) + 2;
|
||||||
|
static int LOWEST_PARALLEL_DIM = ((DIM - 2) / 1) + 2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The full resolution grids
|
* The full resolution grids
|
||||||
@ -57,29 +58,28 @@ void initialization_check();
|
|||||||
void restrict_serial(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM);
|
void restrict_serial(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM);
|
||||||
void prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM);
|
void prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM);
|
||||||
|
|
||||||
|
//parallelized operations
|
||||||
|
void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Relaxes an ODE matrix by 1 iteration of multigrid method
|
* Relaxes an ODE matrix by 1 iteration of multigrid method
|
||||||
* @param phi The phi array
|
* @param phi The phi array
|
||||||
* @param phi0 The phi array from the last frame
|
* @param phi0 The phi array from the last frame
|
||||||
* @param a The a const
|
* @param a The a const
|
||||||
* @param c The c const
|
* @param c The c const
|
||||||
|
* @param GRIDDIM The dimension of the phi grid
|
||||||
* @return The residual
|
* @return The residual
|
||||||
*/
|
*/
|
||||||
float solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float a, float c, int GRIDDIM){
|
float solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float a, float c, int GRIDDIM){
|
||||||
initialization_check();
|
|
||||||
|
|
||||||
int LOWERDIM = ((GRIDDIM - 2) / 2) + 2;
|
int LOWERDIM = ((GRIDDIM - 2) / 2) + 2;
|
||||||
float * currResidual = get_current_residual(GRIDDIM);
|
float * currResidual = get_current_residual(GRIDDIM);
|
||||||
float * lowerPhi = get_current_phi(LOWERDIM);
|
float * lowerPhi = get_current_phi(LOWERDIM);
|
||||||
float * lowerPhi0 = get_current_phi0(LOWERDIM);
|
float * lowerPhi0 = get_current_phi0(LOWERDIM);
|
||||||
float * lowerResidual = get_current_residual(LOWERDIM);
|
float * lowerResidual = get_current_residual(LOWERDIM);
|
||||||
|
|
||||||
//
|
//smooth
|
||||||
//gauss-seidel at highest res
|
|
||||||
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM);
|
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM);
|
||||||
if(GRIDDIM == DIM){
|
|
||||||
fluid_grid2_set_bounds(0,phi);
|
|
||||||
}
|
|
||||||
//compute residuals
|
//compute residuals
|
||||||
solver_multigrid_store_residual_serial(phi,phi0,currResidual,a,c,GRIDDIM);
|
solver_multigrid_store_residual_serial(phi,phi0,currResidual,a,c,GRIDDIM);
|
||||||
|
|
||||||
@ -88,11 +88,6 @@ float solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float
|
|||||||
|
|
||||||
//solve next-coarsest grid
|
//solve next-coarsest grid
|
||||||
if(GRIDDIM <= LOWEST_DIM){
|
if(GRIDDIM <= LOWEST_DIM){
|
||||||
//smooth
|
|
||||||
solver_gauss_seidel_iterate_parallel(quarterGridPhi,quarterGridPhi0,a,c,quarterDim);
|
|
||||||
//compute residual
|
|
||||||
solver_multigrid_store_residual_serial(quarterGridPhi,quarterGridPhi0,quarterGridResidual,a,c,quarterDim);
|
|
||||||
} else {
|
|
||||||
float solution =
|
float solution =
|
||||||
(
|
(
|
||||||
phi0[solver_gauss_seidel_get_index(1,1,1,GRIDDIM)] +
|
phi0[solver_gauss_seidel_get_index(1,1,1,GRIDDIM)] +
|
||||||
@ -115,6 +110,8 @@ float solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
solver_multigrid_iterate_serial_recursive(lowerPhi,lowerPhi0,a,c,LOWERDIM);
|
||||||
}
|
}
|
||||||
|
|
||||||
//interpolate from the lower grid
|
//interpolate from the lower grid
|
||||||
@ -122,10 +119,6 @@ float solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float
|
|||||||
|
|
||||||
//smooth
|
//smooth
|
||||||
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM);
|
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM);
|
||||||
if(GRIDDIM == DIM){
|
|
||||||
fluid_grid2_set_bounds(0,phi);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
return solver_multigrid_calculate_residual_norm_serial(phi,phi0,a,c,GRIDDIM);
|
return solver_multigrid_calculate_residual_norm_serial(phi,phi0,a,c,GRIDDIM);
|
||||||
}
|
}
|
||||||
@ -140,10 +133,52 @@ float solver_multigrid_iterate_serial_recursive(float * phi, float * phi0, float
|
|||||||
* @return The residual
|
* @return The residual
|
||||||
*/
|
*/
|
||||||
float solver_multigrid_iterate_serial(float * phi, float * phi0, float a, float c){
|
float solver_multigrid_iterate_serial(float * phi, float * phi0, float a, float c){
|
||||||
|
initialization_check();
|
||||||
return solver_multigrid_iterate_serial_recursive(phi,phi0,a,c,DIM);
|
return solver_multigrid_iterate_serial_recursive(phi,phi0,a,c,DIM);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Relaxes an ODE matrix by 1 iteration of multigrid method
|
||||||
|
* @param phi The phi array
|
||||||
|
* @param phi0 The phi array from the last frame
|
||||||
|
* @param a The a const
|
||||||
|
* @param c The c const
|
||||||
|
* @param GRIDDIM The dimension of the phi grid
|
||||||
|
* @return The residual
|
||||||
|
*/
|
||||||
|
float solver_multigrid_iterate_parallel_recursive(float * phi, float * phi0, float a, float c, int GRIDDIM){
|
||||||
|
int LOWERDIM = ((GRIDDIM - 2) / 2) + 2;
|
||||||
|
float * currResidual = get_current_residual(GRIDDIM);
|
||||||
|
float * lowerPhi = get_current_phi(LOWERDIM);
|
||||||
|
float * lowerPhi0 = get_current_phi0(LOWERDIM);
|
||||||
|
float * lowerResidual = get_current_residual(LOWERDIM);
|
||||||
|
|
||||||
|
//smooth
|
||||||
|
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM);
|
||||||
|
|
||||||
|
//compute residuals
|
||||||
|
solver_multigrid_store_residual_serial(phi,phi0,currResidual,a,c,GRIDDIM);
|
||||||
|
|
||||||
|
//restrict
|
||||||
|
restrict_parallel(currResidual,GRIDDIM,lowerPhi,lowerPhi0,LOWERDIM);
|
||||||
|
|
||||||
|
//solve next-coarsest grid
|
||||||
|
if(GRIDDIM <= LOWEST_PARALLEL_DIM){
|
||||||
|
solver_multigrid_iterate_serial_recursive(lowerPhi,lowerPhi0,a,c,LOWERDIM);
|
||||||
|
} else {
|
||||||
|
solver_multigrid_iterate_parallel_recursive(lowerPhi,lowerPhi0,a,c,LOWERDIM);
|
||||||
|
}
|
||||||
|
|
||||||
|
//interpolate from the lower grid
|
||||||
|
prolongate_serial(phi,GRIDDIM,lowerPhi,LOWERDIM);
|
||||||
|
|
||||||
|
//smooth
|
||||||
|
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM);
|
||||||
|
|
||||||
|
return solver_multigrid_calculate_residual_norm_serial(phi,phi0,a,c,GRIDDIM);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Relaxes an ODE matrix by 1 iteration of multigrid method
|
* Relaxes an ODE matrix by 1 iteration of multigrid method
|
||||||
* @param phi The phi array
|
* @param phi The phi array
|
||||||
@ -154,167 +189,7 @@ float solver_multigrid_iterate_serial(float * phi, float * phi0, float a, float
|
|||||||
*/
|
*/
|
||||||
float solver_multigrid_iterate_parallel(float * phi, float * phi0, float a, float c){
|
float solver_multigrid_iterate_parallel(float * phi, float * phi0, float a, float c){
|
||||||
initialization_check();
|
initialization_check();
|
||||||
float residual;
|
return solver_multigrid_iterate_parallel_recursive(phi,phi0,a,c,DIM);
|
||||||
|
|
||||||
//
|
|
||||||
//gauss-seidel at highest res
|
|
||||||
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,DIM);
|
|
||||||
fluid_grid2_set_bounds(0,phi);
|
|
||||||
//compute residuals
|
|
||||||
solver_multigrid_store_residual_serial(phi,phi0,fullGridResidual,a,c,DIM);
|
|
||||||
|
|
||||||
|
|
||||||
//restrict
|
|
||||||
//(current operator is injection -- inject r^2 from this grid at phi0 of the smaller grid)
|
|
||||||
for(int x = 0; x < halfDim - 1; x++){
|
|
||||||
for(int y = 0; y < halfDim - 1; y++){
|
|
||||||
for(int z = 0; z < halfDim - 1; z++){
|
|
||||||
halfGridPhi[solver_gauss_seidel_get_index(x,y,z,halfDim)] = 0;
|
|
||||||
halfGridPhi0[solver_gauss_seidel_get_index(x,y,z,halfDim)] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//populate grid
|
|
||||||
for(int x = 1; x < halfDim - 1; x++){
|
|
||||||
for(int y = 1; y < halfDim - 1; y++){
|
|
||||||
for(int z = 1; z < halfDim - 1; z++){
|
|
||||||
//direct transfer operator (faster, lower accuracy)
|
|
||||||
halfGridPhi0[solver_gauss_seidel_get_index(x,y,z,halfDim)] = fullGridResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,DIM)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
//half res
|
|
||||||
//
|
|
||||||
//smooth
|
|
||||||
solver_gauss_seidel_iterate_parallel(halfGridPhi,halfGridPhi0,a,c,halfDim);
|
|
||||||
//compute residual
|
|
||||||
solver_multigrid_store_residual_serial(halfGridPhi,halfGridPhi0,halfGridResidual,a,c,halfDim);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
//quarter res
|
|
||||||
//
|
|
||||||
|
|
||||||
//restrict
|
|
||||||
//(current operator is injection -- inject r^2 from this grid at phi0 of the smaller grid)
|
|
||||||
for(int x = 0; x < quarterDim - 1; x++){
|
|
||||||
for(int y = 0; y < quarterDim - 1; y++){
|
|
||||||
for(int z = 0; z < quarterDim - 1; z++){
|
|
||||||
quarterGridPhi[solver_gauss_seidel_get_index(x,y,z,quarterDim)] = 0;
|
|
||||||
quarterGridPhi0[solver_gauss_seidel_get_index(x,y,z,quarterDim)] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//populate grid
|
|
||||||
for(int x = 1; x < quarterDim - 1; x++){
|
|
||||||
for(int y = 1; y < quarterDim - 1; y++){
|
|
||||||
for(int z = 1; z < quarterDim - 1; z++){
|
|
||||||
//direct transfer operator (faster, lower accuracy)
|
|
||||||
quarterGridPhi0[solver_gauss_seidel_get_index(x,y,z,quarterDim)] = halfGridResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,halfDim)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//smooth
|
|
||||||
solver_gauss_seidel_iterate_parallel(quarterGridPhi,quarterGridPhi0,a,c,quarterDim);
|
|
||||||
//compute residual
|
|
||||||
solver_multigrid_store_residual_serial(quarterGridPhi,quarterGridPhi0,quarterGridResidual,a,c,quarterDim);
|
|
||||||
|
|
||||||
//interpolate into phi of the higher grid
|
|
||||||
for(int x = 1; x < halfDim - 1; x++){
|
|
||||||
for(int y = 1; y < halfDim - 1; y++){
|
|
||||||
for(int z = 1; z < halfDim - 1; z++){
|
|
||||||
//direct transfer operator (faster, lower accuracy)
|
|
||||||
halfGridPhi[solver_gauss_seidel_get_index(x,y,z,halfDim)] =
|
|
||||||
halfGridPhi[solver_gauss_seidel_get_index(x,y,z,halfDim)] +
|
|
||||||
quarterGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+0, z/2+0 ,quarterDim)]
|
|
||||||
;
|
|
||||||
|
|
||||||
//interpolation operator (slower, better accuracy)
|
|
||||||
// phi[solver_gauss_seidel_get_index(x,y,z,DIM)] =
|
|
||||||
// phi[solver_gauss_seidel_get_index(x,y,z,DIM)] +
|
|
||||||
// (
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+0, z/2+0 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+0, z/2+1 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+1, z/2+0 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+1, z/2+1 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+0, z/2+0 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+0, z/2+1 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+1, z/2+0 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+1, z/2+1 ,halfDim)]
|
|
||||||
// )
|
|
||||||
// ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
//half res
|
|
||||||
//
|
|
||||||
|
|
||||||
//smooth
|
|
||||||
solver_gauss_seidel_iterate_parallel(halfGridPhi,halfGridPhi0,a,c,halfDim);
|
|
||||||
//compute residual
|
|
||||||
solver_multigrid_store_residual_serial(halfGridPhi,halfGridPhi0,halfGridResidual,a,c,halfDim);
|
|
||||||
|
|
||||||
//interpolate phi of the higher grid
|
|
||||||
for(int x = 1; x < DIM - 1; x++){
|
|
||||||
for(int y = 1; y < DIM - 1; y++){
|
|
||||||
for(int z = 1; z < DIM - 1; z++){
|
|
||||||
//direct transfer operator (faster, lower accuracy)
|
|
||||||
phi[solver_gauss_seidel_get_index(x,y,z,DIM)] =
|
|
||||||
phi[solver_gauss_seidel_get_index(x,y,z,DIM)] +
|
|
||||||
halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+0, z/2+0 ,halfDim)]
|
|
||||||
;
|
|
||||||
|
|
||||||
//interpolation operator (slower, better accuracy)
|
|
||||||
// phi[solver_gauss_seidel_get_index(x,y,z,DIM)] =
|
|
||||||
// phi[solver_gauss_seidel_get_index(x,y,z,DIM)] +
|
|
||||||
// (
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+0, z/2+0 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+0, z/2+1 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+1, z/2+0 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+0, y/2+1, z/2+1 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+0, z/2+0 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+0, z/2+1 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+1, z/2+0 ,halfDim)] +
|
|
||||||
// halfGridPhi[solver_gauss_seidel_get_index( x/2+1, y/2+1, z/2+1 ,halfDim)]
|
|
||||||
// )
|
|
||||||
// ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// full res
|
|
||||||
//
|
|
||||||
//
|
|
||||||
//smooth
|
|
||||||
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,DIM);
|
|
||||||
fluid_grid2_set_bounds(0,phi);
|
|
||||||
|
|
||||||
|
|
||||||
return solver_multigrid_calculate_residual_norm_serial(phi,phi0,a,c,DIM);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -398,6 +273,94 @@ void restrict_serial(float * currResidual, int GRIDDIM, float * lowerPhi, float
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serially restricts the current residual into the lower phi grid
|
||||||
|
*/
|
||||||
|
void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM){
|
||||||
|
if(LOWERDIM < 10){
|
||||||
|
restrict_serial(currResidual,GRIDDIM,lowerPhi,lowerPhi0,LOWERDIM);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int x, y, z;
|
||||||
|
__m256 zeroVec = _mm256_setzero_ps();
|
||||||
|
__m256 residuals;
|
||||||
|
__m256i offsets = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
||||||
|
|
||||||
|
//
|
||||||
|
//set first plane
|
||||||
|
//
|
||||||
|
x = 0;
|
||||||
|
for(y = 0; y < LOWERDIM; y++){
|
||||||
|
for(z = 0; z < LOWERDIM-7; z=z+8){
|
||||||
|
_mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec);
|
||||||
|
}
|
||||||
|
z = LOWERDIM - 8;
|
||||||
|
_mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
//set main volume
|
||||||
|
//
|
||||||
|
for(x = 1; x < LOWERDIM - 1; x++){
|
||||||
|
|
||||||
|
//
|
||||||
|
//set the first edge
|
||||||
|
//
|
||||||
|
y = 0;
|
||||||
|
for(z = 0; z < LOWERDIM-7; z=z+8){
|
||||||
|
_mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec);
|
||||||
|
}
|
||||||
|
z = LOWERDIM - 8;
|
||||||
|
_mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec);
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
//copy the main contents
|
||||||
|
//
|
||||||
|
for(y = 1; y < LOWERDIM - 1; y++){
|
||||||
|
lowerPhi[solver_gauss_seidel_get_index(x,y,0,LOWERDIM)] = 0;
|
||||||
|
for(z = 1; z < LOWERDIM-7; z=z+8){
|
||||||
|
_mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec);
|
||||||
|
residuals = _mm256_i32gather_ps(&currResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,GRIDDIM)],offsets,2);
|
||||||
|
_mm256_storeu_ps(&lowerPhi0[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],residuals);
|
||||||
|
}
|
||||||
|
lowerPhi[solver_gauss_seidel_get_index(x,y,LOWERDIM - 1,LOWERDIM)] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
//set the last edge
|
||||||
|
//
|
||||||
|
for(z = 0; z < LOWERDIM-7; z=z+8){
|
||||||
|
_mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec);
|
||||||
|
}
|
||||||
|
z = LOWERDIM - 8;
|
||||||
|
_mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
//set last plane
|
||||||
|
//
|
||||||
|
x = LOWERDIM - 1;
|
||||||
|
for(y = 0; y < LOWERDIM; y++){
|
||||||
|
for(z = 0; z < LOWERDIM-7; z=z+8){
|
||||||
|
_mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec);
|
||||||
|
}
|
||||||
|
//zero out the end
|
||||||
|
z = LOWERDIM - 8;
|
||||||
|
_mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec);
|
||||||
|
}
|
||||||
|
|
||||||
|
//populate grid
|
||||||
|
for(x = 1; x < LOWERDIM - 1; x++){
|
||||||
|
for(y = 1; y < LOWERDIM - 1; y++){
|
||||||
|
for(z = 1; z < LOWERDIM - 1; z++){
|
||||||
|
//direct transfer operator (faster, lower accuracy)
|
||||||
|
lowerPhi0[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)] = currResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,GRIDDIM)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prolongates a lower grid into a higher grid
|
* Prolongates a lower grid into a higher grid
|
||||||
*/
|
*/
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user