parallel prolongation
This commit is contained in:
parent
66f7080890
commit
8e5d22426f
@ -60,6 +60,7 @@ void prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM)
|
|||||||
|
|
||||||
//parallelized operations
|
//parallelized operations
|
||||||
void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM);
|
void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM);
|
||||||
|
void prolongate_parallel(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Relaxes an ODE matrix by 1 iteration of multigrid method
|
* Relaxes an ODE matrix by 1 iteration of multigrid method
|
||||||
@ -171,7 +172,7 @@ float solver_multigrid_iterate_parallel_recursive(float * phi, float * phi0, flo
|
|||||||
}
|
}
|
||||||
|
|
||||||
//interpolate from the lower grid
|
//interpolate from the lower grid
|
||||||
prolongate_serial(phi,GRIDDIM,lowerPhi,LOWERDIM);
|
prolongate_parallel(phi,GRIDDIM,lowerPhi,LOWERDIM);
|
||||||
|
|
||||||
//smooth
|
//smooth
|
||||||
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM);
|
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM);
|
||||||
@ -321,7 +322,7 @@ void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, floa
|
|||||||
lowerPhi[solver_gauss_seidel_get_index(x,y,0,LOWERDIM)] = 0;
|
lowerPhi[solver_gauss_seidel_get_index(x,y,0,LOWERDIM)] = 0;
|
||||||
for(z = 1; z < LOWERDIM-7; z=z+8){
|
for(z = 1; z < LOWERDIM-7; z=z+8){
|
||||||
_mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec);
|
_mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec);
|
||||||
residuals = _mm256_i32gather_ps(&currResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,GRIDDIM)],offsets,2);
|
residuals = _mm256_i32gather_ps(&currResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,GRIDDIM)],offsets,sizeof(float));
|
||||||
_mm256_storeu_ps(&lowerPhi0[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],residuals);
|
_mm256_storeu_ps(&lowerPhi0[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],residuals);
|
||||||
}
|
}
|
||||||
lowerPhi[solver_gauss_seidel_get_index(x,y,LOWERDIM - 1,LOWERDIM)] = 0;
|
lowerPhi[solver_gauss_seidel_get_index(x,y,LOWERDIM - 1,LOWERDIM)] = 0;
|
||||||
@ -379,6 +380,31 @@ void prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prolongates a lower grid into a higher grid
|
||||||
|
*/
|
||||||
|
void prolongate_parallel(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM){
|
||||||
|
__m256i offsets = _mm256_set_epi32(0, 0, 1, 1, 2, 2, 3, 3);
|
||||||
|
__m256 lowerPhiVec;
|
||||||
|
__m256 phiVec;
|
||||||
|
int x, y, z;
|
||||||
|
for(int x = 1; x < GRIDDIM - 1; x++){
|
||||||
|
for(int y = 1; y < GRIDDIM - 1; y++){
|
||||||
|
for(int z = 1; z < GRIDDIM - 1; z=z+8){
|
||||||
|
phiVec = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(x,y,z,GRIDDIM)]);
|
||||||
|
lowerPhiVec = _mm256_i32gather_ps(&lowerPhi[solver_gauss_seidel_get_index(x/2,y/2,z/2,LOWERDIM)],offsets,sizeof(float));
|
||||||
|
_mm256_storeu_ps(
|
||||||
|
&phi[solver_gauss_seidel_get_index(x,y,z,GRIDDIM)],
|
||||||
|
_mm256_add_ps(
|
||||||
|
phiVec,
|
||||||
|
lowerPhiVec
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Verifies that all grids are allocated
|
* Verifies that all grids are allocated
|
||||||
*/
|
*/
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user